Skip to content
Snippets Groups Projects
Commit b9ba6fbd authored by Blaise Li's avatar Blaise Li
Browse files

Use external script PRO-seq_trim_and_dedup.sh

The code in the snakefile produced weird results.
parent 194442a9
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ Snakefile to analyse PRO-seq data. ...@@ -3,6 +3,7 @@ Snakefile to analyse PRO-seq data.
""" """
import sys import sys
major, minor = sys.version_info[:2] major, minor = sys.version_info[:2]
# TODO: What if major > 3 and minor < 6 ?
if major < 3 or minor < 6: if major < 3 or minor < 6:
sys.exit("Need at least python 3.6\n") sys.exit("Need at least python 3.6\n")
...@@ -294,59 +295,60 @@ rule trim_and_dedup: ...@@ -294,59 +295,60 @@ rule trim_and_dedup:
log = OPJ(log_dir, "{trimmer}", "trim_and_dedup", "{lib}_{treat}_{rep}.log"), log = OPJ(log_dir, "{trimmer}", "trim_and_dedup", "{lib}_{treat}_{rep}.log"),
err = OPJ(log_dir, "{trimmer}", "trim_and_dedup", "{lib}_{treat}_{rep}.err"), err = OPJ(log_dir, "{trimmer}", "trim_and_dedup", "{lib}_{treat}_{rep}.err"),
run: run:
if wildcards.trimmer == "cutadapt": if True:
#if wildcards.trimmer == "cutadapt":
shell_commands = """ shell_commands = """
#PRO-seq_trim_and_dedup.sh {wildcards.trimmer} {input} {params.adapter} {params.trim5} {params.trim3} {output.adapt} {output.noadapt} {log.trim} {output.nb_raw} {output.nb_adapt} {output.nb_adapt_deduped} {output.nb_noadapt} {output.nb_noadapt_deduped} 1> {log.log} 2> {log.err} PRO-seq_trim_and_dedup.sh {wildcards.trimmer} {input} {params.adapter} {params.trim5} {params.trim3} {output.adapt} {output.noadapt} {log.trim} {output.nb_raw} {output.nb_adapt} {output.nb_adapt_deduped} {output.nb_noadapt} {output.nb_noadapt_deduped} 1> {log.log} 2> {log.err}
# This named pipe is used to avoid writing the intermediate file to disk # This named pipe is used to avoid writing the intermediate file to disk
# It will transmit reads that did not seem to contain the adapter to the # It will transmit reads that did not seem to contain the adapter to the
# second sorting and deduplicating. # second sorting and deduplicating.
mkfifo {params.untrimmed_tmp} #mkfifo {params.untrimmed_tmp}
# -m {params.minsize_trimmed} is to discard reads that are shorter than this after trimming # -m {params.minsize_trimmed} is to discard reads that are shorter than this after trimming
# a second cutadapt step removes the random nucleotides that helped identify PCR duplicates. # a second cutadapt step removes the random nucleotides that helped identify PCR duplicates.
zcat {input} \\ #zcat {input} \\
| tee >(count_fastq_reads {output.nb_raw}) \\ # | tee >(count_fastq_reads {output.nb_raw}) \\
| cutadapt -a {params.adapter} -m {params.minsize_trimmed} \\ # | cutadapt -a {params.adapter} -m {params.minsize_trimmed} \\
--untrimmed-output={params.untrimmed_tmp} - 2> {log.trim} \\ # --untrimmed-output={params.untrimmed_tmp} - 2> {log.trim} \\
| tee >(count_fastq_reads {output.nb_adapt}) \\ # | tee >(count_fastq_reads {output.nb_adapt}) \\
| dedup \\ # | dedup \\
| tee >(count_fastq_reads {output.nb_adapt_deduped}) \\ # | trim_random_nt {params.trim5} {params.trim3} \\
| trim_random_nt {params.trim5} {params.trim3} \\ # | tee >(count_fastq_reads {output.nb_adapt_deduped}) \\
| gzip > {output.noadapt} & # | gzip > {output.noadapt} &
pid_to_wait=$! #pid_to_wait=$!
cat {params.untrimmed_tmp} \\ #cat {params.untrimmed_tmp} \\
| tee >(count_fastq_reads {output.nb_noadapt}) \\ # | tee >(count_fastq_reads {output.nb_noadapt}) \\
| dedup \\ # | dedup \\
| tee >(count_fastq_reads {output.nb_noadapt_deduped}) \\ # | trim_random_nt {params.trim5} {params.trim3} \\
| trim_random_nt {params.trim5} {params.trim3} \\ # | tee >(count_fastq_reads {output.nb_noadapt_deduped}) \\
| gzip > {output.adapt} \\ # | gzip > {output.adapt} \\
|| rm -f {params.untrimmed_tmp} # || rm -f {params.untrimmed_tmp}
wait ${{pid_to_wait}} #wait ${{pid_to_wait}}
rm -f {params.untrimmed_tmp} #rm -f {params.untrimmed_tmp}
""" """
elif wildcards.trimmer == "fastx_clipper": # elif wildcards.trimmer == "fastx_clipper":
shell_commands = """ # shell_commands = """
zcat {input} \\ #zcat {input} \\
| tee >(count_fastq_reads {output.nb_raw}) \\ # | tee >(count_fastq_reads {output.nb_raw}) \\
| fastx_clipper -a {params.adapter} -l {params.minsize_trimmed} \\ # | fastx_clipper -a {params.adapter} -l {params.minsize_trimmed} \\
-c -M 3 -n -v 2> {log.trim} \\ # -c -M 3 -n -v 2> {log.trim} \\
| tee >(count_fastq_reads {output.nb_adapt}) \\ # | tee >(count_fastq_reads {output.nb_adapt}) \\
| dedup \\ # | dedup \\
| tee >(count_fastq_reads {output.nb_adapt_deduped}) \\ # | tee >(count_fastq_reads {output.nb_adapt_deduped}) \\
| trim_random_nt {params.trim5} {params.trim3} \\ # | trim_random_nt {params.trim5} {params.trim3} \\
| gzip > {output.noadapt} & # | gzip > {output.noadapt} &
pid_to_wait=$! #pid_to_wait=$!
zcat {input} \\ #zcat {input} \\
| fastx_clipper -a {params.adapter} -l {params.minsize_trimmed} \\ # | fastx_clipper -a {params.adapter} -l {params.minsize_trimmed} \\
-C -M 3 -n -v 2>> {log.trim} \\ # -C -M 3 -n -v 2>> {log.trim} \\
| tee >(count_fastq_reads {output.nb_noadapt}) \\ # | tee >(count_fastq_reads {output.nb_noadapt}) \\
| dedup \\ # | dedup \\
| tee >(count_fastq_reads {output.nb_noadapt_deduped}) \\ # | tee >(count_fastq_reads {output.nb_noadapt_deduped}) \\
| trim_random_nt {params.trim5} {params.trim3} \\ # | trim_random_nt {params.trim5} {params.trim3} \\
| gzip > {output.adapt} # | gzip > {output.adapt}
wait ${{pid_to_wait}} #wait ${{pid_to_wait}}
""" #"""
else: # else:
raise NotImplementedError("%s is not yet handled." % wildcards.trimmer) # raise NotImplementedError("%s is not yet handled." % wildcards.trimmer)
shell(shell_commands) shell(shell_commands)
......
...@@ -111,7 +111,7 @@ dedup_trimmed() ...@@ -111,7 +111,7 @@ dedup_trimmed()
dedup_untrimmed() dedup_untrimmed()
{ {
# $1: file in which to write the number of fastq records after deduplication # $1: file in which to write the number of fastq records after deduplication
cmd="cat - | dedup | cutadapt -u -${threeprime_random} -u ${fiveprime_random} - | tee >(count_fastq_reads ${1}) | gzip" cmd="cat - | dedup | trim_random_nt ${threeprime_random} ${fiveprime_random} | tee >(count_fastq_reads ${1}) | gzip"
echo ${cmd} echo ${cmd}
eval ${cmd} > ${untrimmed_out} || error_exit "${cmd} failed" eval ${cmd} > ${untrimmed_out} || error_exit "${cmd} failed"
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment