Commit e7497452 authored by Blaise Li's avatar Blaise Li
Browse files

Deduplication optional in sRNA-seq.

This should be done via a "nodedup" switch in the pipeline config.
parent 579fae03
......@@ -944,8 +944,7 @@ rule all:
#include: relative_include_path
include: ensure_relative(irules["link_raw_data"], workflow.basedir)
if int(trim3) + int(trim5) > 0:
# Extra step needed: deduplication and removal of UMIs
if int(trim3) + int(trim5) == 0 or config.get("nodedup", False):
rule trim_and_dedup:
input:
rules.link_raw_data.output,
......@@ -963,7 +962,7 @@ if int(trim3) + int(trim5) > 0:
#resources:
# mem_mb=1049300
message:
"Trimming adaptor from raw data, deduplicating reads, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep}."
"Trimming adaptor from raw data, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep} (no deduplication)."
benchmark:
OPJ(log_dir, "trim_and_dedup", "{lib}_{rep}_benchmark.txt")
log:
......@@ -975,19 +974,21 @@ if int(trim3) + int(trim5) > 0:
| tee >(count_fastq_reads {output.nb_raw}) \\
| cutadapt -a {params.adapter} --discard-untrimmed - 2> {log.cutadapt} \\
| tee >(count_fastq_reads {output.nb_trimmed}) \\
| dedup \\
| tee >(count_fastq_reads {output.nb_deduped}) \\
| trim_random_nt {params.trim5} {params.trim3} 2>> {log.cutadapt} \\
| gzip > {output.trimmed} \\
2> {log.trim_and_dedup}
cp {output.nb_trimmed} {output.nb_deduped}
"""
else:
# Extra step needed: deduplication before removal of UMIs
rule trim_and_dedup:
input:
rules.link_raw_data.output,
#OPJ(data_dir, "{lib}_{rep}.fastq.gz"),
params:
adapter = lambda wildcards: lib2adapt[wildcards.lib],
trim5 = trim5,
trim3 = trim3,
output:
trimmed = OPJ(data_dir, "trimmed", "{lib}_{rep}_trimmed.fastq.gz"),
nb_raw = OPJ(data_dir, "trimmed", "{lib}_{rep}_nb_raw.txt"),
......@@ -997,7 +998,7 @@ else:
#resources:
# mem_mb=1049300
message:
"Trimming adaptor from raw data for {wildcards.lib}_{wildcards.rep} (no deduplication)."
"Trimming adaptor from raw data, deduplicating reads, removing random 5' {trim5}-mers and 3' {trim3}-mers for {wildcards.lib}_{wildcards.rep}."
benchmark:
OPJ(log_dir, "trim_and_dedup", "{lib}_{rep}_benchmark.txt")
log:
......@@ -1009,9 +1010,11 @@ else:
| tee >(count_fastq_reads {output.nb_raw}) \\
| cutadapt -a {params.adapter} --discard-untrimmed - 2> {log.cutadapt} \\
| tee >(count_fastq_reads {output.nb_trimmed}) \\
| dedup \\
| tee >(count_fastq_reads {output.nb_deduped}) \\
| trim_random_nt {params.trim5} {params.trim3} 2>> {log.cutadapt} \\
| gzip > {output.trimmed} \\
2> {log.trim_and_dedup}
cp {output.nb_trimmed} {output.nb_deduped}
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment