From 23a2c768541a40ee6e47f25b12b1b458e372a135 Mon Sep 17 00:00:00 2001 From: rlegendr <rachel.legendre@pasteur.fr> Date: Fri, 17 Sep 2021 10:36:25 +0200 Subject: [PATCH] gestion of gz --- Snakefile | 10 +++---- workflow/rules/sortmerna.rules | 6 ++--- workflow/rules/star_mapping_pass1.rules | 30 ++++++++++++++------- workflow/rules/star_mapping_pass2.rules | 36 +++++++++++++++++-------- 4 files changed, 54 insertions(+), 28 deletions(-) diff --git a/Snakefile b/Snakefile index a7c3018..664c5cb 100755 --- a/Snakefile +++ b/Snakefile @@ -212,8 +212,8 @@ if config["star_mapping"]["do"]: star_index_fasta = unpack(mapping_index) star_mapping_splice_file = unpack(annot_index) star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log" - star_index_output_done = config["genome"]["genome_directory"]+"{REF}/STAR/SAindex" - star_index_output_dir = config["genome"]["genome_directory"]+"{REF}/STAR/" + star_index_output_done = config["genome"]["genome_directory"]+"/{REF}/STAR/SAindex" + star_index_output_dir = config["genome"]["genome_directory"]+"/{REF}/STAR/" include: os.path.join(RULES, "star_index.rules") @@ -223,7 +223,7 @@ if config["star_mapping"]["do"]: star_mapping_pass1_done = star_index_output_done star_mapping_pass1_index = star_index_output_dir star_mapping_pass1_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_init.out" - star_mapping_pass1_output_prefix = "02-Mapping/STAR/{REF}/{SAMPLE}_{REF}_init_" + star_mapping_pass1_output_prefix = "02-Mapping/{REF}/STAR/{SAMPLE}_{REF}_init_" star_mapping_pass1_junctions = "02-Mapping/{REF}/STAR/{SAMPLE}_{REF}_init_SJ.out.tab" star_mapping_pass1_bam = "02-Mapping/{REF}/STAR/{SAMPLE}_{REF}_init_Aligned.sortedByCoord.out.bam" star_mapping_pass1_read_groups = "" @@ -291,8 +291,8 @@ if config["feature_counts"]["do"]: #---------------------------------- flagstat_input = counting_index -flagstat_logs = "02-Mapping/Flagstats/{REF}/{MAP}/logs/{SAMPLE}_{REF}.out" -flagstat_output = "02-Mapping/Flagstats/{REF}/{MAP}/{SAMPLE}_{REF}_stats.out" +flagstat_logs = "02-Mapping/{REF}/{MAP}/logs/{SAMPLE}_{REF}.out" +flagstat_output = "02-Mapping/{REF}/{MAP}/{SAMPLE}_{REF}_stats.out" final_output.extend(expand(flagstat_output, SAMPLE=samples, REF=ref, MAP=mapper)) include: os.path.join(RULES, "flagstat.rules") diff --git a/workflow/rules/sortmerna.rules b/workflow/rules/sortmerna.rules index c176c46..27795ce 100755 --- a/workflow/rules/sortmerna.rules +++ b/workflow/rules/sortmerna.rules @@ -44,8 +44,8 @@ rule sortmerna: set +o pipefail #tmp="{input.fastq}" #infiles=($tmp) - fasta={input.fasta} - index=${{fasta%%.fa}} + fasta="{input.fasta}" + index=${{fasta%.fa}} if [[ ! -s ${{index}}.stats ]] then @@ -53,7 +53,7 @@ rule sortmerna: fi - sortmerna --ref $${{fasta}},${{index}} -a {threads} --reads {input.fastq} --aligned outfile_rRNA --fastx --sam --num_alignments 1 --other outfile_noRNA --log -v > {log.out} 2> {log.err} + sortmerna --ref ${{fasta}},${{index}} -a {threads} --reads {input.fastq} --aligned outfile_rRNA --fastx --sam --num_alignments 1 --other outfile_noRNA --log -v > {log.out} 2> {log.err} pigz -fc outfile_rRNA > {output.rRNA} diff --git a/workflow/rules/star_mapping_pass1.rules b/workflow/rules/star_mapping_pass1.rules index c1df88d..516ead4 100755 --- a/workflow/rules/star_mapping_pass1.rules +++ b/workflow/rules/star_mapping_pass1.rules @@ -45,15 +45,27 @@ rule star_mapping_pass1: star_mapping_pass1_logs shell: """ - STAR --genomeDir {params.index} \ - --readFilesIn {input.fastq} \ - --runThreadN {threads} \ - --genomeLoad NoSharedMemory \ - --outSAMtype BAM SortedByCoordinate \ - --readFilesCommand zcat \ - --seedSearchStartLmax 20 \ - --outFileNamePrefix {params.prefix} \ - {params.kwargs} 2> {log} + if [[ {input.fastq} == "*.gz" ]] + then + STAR --genomeDir {params.index} \ + --readFilesIn {input.fastq} \ + --runThreadN {threads} \ + --genomeLoad NoSharedMemory \ + --outSAMtype BAM SortedByCoordinate \ + --readFilesCommand zcat \ + --seedSearchStartLmax 20 \ + --outFileNamePrefix {params.prefix} \ + {params.kwargs} 2> {log} + else + STAR --genomeDir {params.index} \ + --readFilesIn {input.fastq} \ + --runThreadN {threads} \ + --genomeLoad NoSharedMemory \ + --outSAMtype BAM SortedByCoordinate \ + --seedSearchStartLmax 20 \ + --outFileNamePrefix {params.prefix} \ + {params.kwargs} 2> {log} + fi samtools index {params.prefix}Aligned.sortedByCoord.out.bam 2>> {log} diff --git a/workflow/rules/star_mapping_pass2.rules b/workflow/rules/star_mapping_pass2.rules index a792f70..daae75f 100755 --- a/workflow/rules/star_mapping_pass2.rules +++ b/workflow/rules/star_mapping_pass2.rules @@ -46,17 +46,31 @@ rule star_mapping_pass2: star_mapping_pass2_logs shell: """ - STAR --genomeDir {params.index} \ - --readFilesIn {input.fastq} \ - --runThreadN {threads} \ - --genomeLoad NoSharedMemory \ - --outSAMtype BAM SortedByCoordinate \ - --readFilesCommand zcat \ - --seedSearchStartLmax 20 \ - --outFileNamePrefix {params.prefix} \ - --outSAMattrRGline {params.RG} \ - --sjdbFileChrStartEnd {input.sjdb} \ - {params.kwargs} 2> {log} + if [[ {input.fastq} == "*.gz" ]] + then + STAR --genomeDir {params.index} \ + --readFilesIn {input.fastq} \ + --runThreadN {threads} \ + --genomeLoad NoSharedMemory \ + --outSAMtype BAM SortedByCoordinate \ + --readFilesCommand zcat \ + --seedSearchStartLmax 20 \ + --outFileNamePrefix {params.prefix} \ + --outSAMattrRGline {params.RG} \ + --sjdbFileChrStartEnd {input.sjdb} \ + {params.kwargs} 2> {log} + else + STAR --genomeDir {params.index} \ + --readFilesIn {input.fastq} \ + --runThreadN {threads} \ + --genomeLoad NoSharedMemory \ + --outSAMtype BAM SortedByCoordinate \ + --seedSearchStartLmax 20 \ + --outFileNamePrefix {params.prefix} \ + --outSAMattrRGline {params.RG} \ + --sjdbFileChrStartEnd {input.sjdb} \ + {params.kwargs} 2> {log} + fi samtools index {params.prefix}Aligned.sortedByCoord.out.bam 2>> {log} -- GitLab