diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile
index 2b83018ec473822d6e5536634e61f57b4ac96467..db224ae69bad78b0feb351b0d1b7a9dd36e2571f 100644
--- a/small_RNA-seq/small_RNA-seq.snakefile
+++ b/small_RNA-seq/small_RNA-seq.snakefile
@@ -944,8 +944,7 @@ rule all:
#include: relative_include_path
include: ensure_relative(irules["link_raw_data"], workflow.basedir)
-if int(trim3) + int(trim5) > 0:
- # Extra step needed: deduplication and removal of UMIs
+if int(trim3) + int(trim5) == 0 or config.get("nodedup", False):
rule trim_and_dedup:
input:
rules.link_raw_data.output,
@@ -963,7 +962,7 @@ if int(trim3) + int(trim5) > 0:
#resources:
# mem_mb=1049300
message:
- "Trimming adaptor from raw data, deduplicating reads, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep}."
+ "Trimming adaptor from raw data, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep} (no deduplication)."
benchmark:
OPJ(log_dir, "trim_and_dedup", "{lib}_{rep}_benchmark.txt")
log:
@@ -975,19 +974,21 @@ if int(trim3) + int(trim5) > 0:
| tee >(count_fastq_reads {output.nb_raw}) \\
| cutadapt -a {params.adapter} --discard-untrimmed - 2> {log.cutadapt} \\
| tee >(count_fastq_reads {output.nb_trimmed}) \\
- | dedup \\
- | tee >(count_fastq_reads {output.nb_deduped}) \\
| trim_random_nt {params.trim5} {params.trim3} 2>> {log.cutadapt} \\
| gzip > {output.trimmed} \\
2> {log.trim_and_dedup}
+ cp {output.nb_trimmed} {output.nb_deduped}
"""
else:
+ # Extra step needed: deduplication before removal of UMIs
rule trim_and_dedup:
input:
rules.link_raw_data.output,
#OPJ(data_dir, "{lib}_{rep}.fastq.gz"),
params:
adapter = lambda wildcards: lib2adapt[wildcards.lib],
+ trim5 = trim5,
+ trim3 = trim3,
output:
trimmed = OPJ(data_dir, "trimmed", "{lib}_{rep}_trimmed.fastq.gz"),
nb_raw = OPJ(data_dir, "trimmed", "{lib}_{rep}_nb_raw.txt"),
@@ -997,7 +998,7 @@ else:
#resources:
# mem_mb=1049300
message:
- "Trimming adaptor from raw data for {wildcards.lib}_{wildcards.rep} (no deduplication)."
+ "Trimming adaptor from raw data, deduplicating reads, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep}."
benchmark:
OPJ(log_dir, "trim_and_dedup", "{lib}_{rep}_benchmark.txt")
log:
@@ -1009,9 +1010,11 @@ else:
| tee >(count_fastq_reads {output.nb_raw}) \\
| cutadapt -a {params.adapter} --discard-untrimmed - 2> {log.cutadapt} \\
| tee >(count_fastq_reads {output.nb_trimmed}) \\
+ | dedup \\
+ | tee >(count_fastq_reads {output.nb_deduped}) \\
+ | trim_random_nt {params.trim5} {params.trim3} 2>> {log.cutadapt} \\
| gzip > {output.trimmed} \\
2> {log.trim_and_dedup}
- cp {output.nb_trimmed} {output.nb_deduped}
"""