diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile index 2b83018ec473822d6e5536634e61f57b4ac96467..db224ae69bad78b0feb351b0d1b7a9dd36e2571f 100644 --- a/small_RNA-seq/small_RNA-seq.snakefile +++ b/small_RNA-seq/small_RNA-seq.snakefile @@ -944,8 +944,7 @@ rule all: #include: relative_include_path include: ensure_relative(irules["link_raw_data"], workflow.basedir) -if int(trim3) + int(trim5) > 0: - # Extra step needed: deduplication and removal of UMIs +if int(trim3) + int(trim5) == 0 or config.get("nodedup", False): rule trim_and_dedup: input: rules.link_raw_data.output, @@ -963,7 +962,7 @@ if int(trim3) + int(trim5) > 0: #resources: # mem_mb=1049300 message: - "Trimming adaptor from raw data, deduplicating reads, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep}." + "Trimming adaptor from raw data, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep} (no deduplication)." benchmark: OPJ(log_dir, "trim_and_dedup", "{lib}_{rep}_benchmark.txt") log: @@ -975,19 +974,21 @@ if int(trim3) + int(trim5) > 0: | tee >(count_fastq_reads {output.nb_raw}) \\ | cutadapt -a {params.adapter} --discard-untrimmed - 2> {log.cutadapt} \\ | tee >(count_fastq_reads {output.nb_trimmed}) \\ - | dedup \\ - | tee >(count_fastq_reads {output.nb_deduped}) \\ | trim_random_nt {params.trim5} {params.trim3} 2>> {log.cutadapt} \\ | gzip > {output.trimmed} \\ 2> {log.trim_and_dedup} + cp {output.nb_trimmed} {output.nb_deduped} """ else: + # Extra step needed: deduplication before removal of UMIs rule trim_and_dedup: input: rules.link_raw_data.output, #OPJ(data_dir, "{lib}_{rep}.fastq.gz"), params: adapter = lambda wildcards: lib2adapt[wildcards.lib], + trim5 = trim5, + trim3 = trim3, output: trimmed = OPJ(data_dir, "trimmed", "{lib}_{rep}_trimmed.fastq.gz"), nb_raw = OPJ(data_dir, "trimmed", "{lib}_{rep}_nb_raw.txt"), @@ -997,7 +998,7 @@ else: #resources: # mem_mb=1049300 message: - "Trimming adaptor from raw data for {wildcards.lib}_{wildcards.rep} (no deduplication)." + "Trimming adaptor from raw data, deduplicating reads, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep}." benchmark: OPJ(log_dir, "trim_and_dedup", "{lib}_{rep}_benchmark.txt") log: @@ -1009,9 +1010,11 @@ else: | tee >(count_fastq_reads {output.nb_raw}) \\ | cutadapt -a {params.adapter} --discard-untrimmed - 2> {log.cutadapt} \\ | tee >(count_fastq_reads {output.nb_trimmed}) \\ + | dedup \\ + | tee >(count_fastq_reads {output.nb_deduped}) \\ + | trim_random_nt {params.trim5} {params.trim3} 2>> {log.cutadapt} \\ | gzip > {output.trimmed} \\ 2> {log.trim_and_dedup} - cp {output.nb_trimmed} {output.nb_deduped} """