From 23a2c768541a40ee6e47f25b12b1b458e372a135 Mon Sep 17 00:00:00 2001
From: rlegendr <rachel.legendre@pasteur.fr>
Date: Fri, 17 Sep 2021 10:36:25 +0200
Subject: [PATCH] gestion of gz

---
 Snakefile                               | 10 +++----
 workflow/rules/sortmerna.rules          |  6 ++---
 workflow/rules/star_mapping_pass1.rules | 30 ++++++++++++++-------
 workflow/rules/star_mapping_pass2.rules | 36 +++++++++++++++++--------
 4 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/Snakefile b/Snakefile
index a7c3018..664c5cb 100755
--- a/Snakefile
+++ b/Snakefile
@@ -212,8 +212,8 @@ if config["star_mapping"]["do"]:
     star_index_fasta = unpack(mapping_index)
     star_mapping_splice_file = unpack(annot_index)
     star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log"
-    star_index_output_done = config["genome"]["genome_directory"]+"{REF}/STAR/SAindex"
-    star_index_output_dir = config["genome"]["genome_directory"]+"{REF}/STAR/" 
+    star_index_output_done = config["genome"]["genome_directory"]+"/{REF}/STAR/SAindex"
+    star_index_output_dir = config["genome"]["genome_directory"]+"/{REF}/STAR/" 
  
     include: os.path.join(RULES, "star_index.rules")
 
@@ -223,7 +223,7 @@ if config["star_mapping"]["do"]:
     star_mapping_pass1_done = star_index_output_done
     star_mapping_pass1_index = star_index_output_dir
     star_mapping_pass1_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_init.out"
-    star_mapping_pass1_output_prefix =  "02-Mapping/STAR/{REF}/{SAMPLE}_{REF}_init_"
+    star_mapping_pass1_output_prefix =  "02-Mapping/{REF}/STAR/{SAMPLE}_{REF}_init_"
     star_mapping_pass1_junctions = "02-Mapping/{REF}/STAR/{SAMPLE}_{REF}_init_SJ.out.tab"
     star_mapping_pass1_bam =  "02-Mapping/{REF}/STAR/{SAMPLE}_{REF}_init_Aligned.sortedByCoord.out.bam"
     star_mapping_pass1_read_groups = ""
@@ -291,8 +291,8 @@ if config["feature_counts"]["do"]:
 #----------------------------------
 
 flagstat_input = counting_index
-flagstat_logs = "02-Mapping/Flagstats/{REF}/{MAP}/logs/{SAMPLE}_{REF}.out"
-flagstat_output = "02-Mapping/Flagstats/{REF}/{MAP}/{SAMPLE}_{REF}_stats.out"
+flagstat_logs = "02-Mapping/{REF}/{MAP}/logs/{SAMPLE}_{REF}.out"
+flagstat_output = "02-Mapping/{REF}/{MAP}/{SAMPLE}_{REF}_stats.out"
 final_output.extend(expand(flagstat_output, SAMPLE=samples, REF=ref, MAP=mapper))    
 include: os.path.join(RULES, "flagstat.rules")  
 
diff --git a/workflow/rules/sortmerna.rules b/workflow/rules/sortmerna.rules
index c176c46..27795ce 100755
--- a/workflow/rules/sortmerna.rules
+++ b/workflow/rules/sortmerna.rules
@@ -44,8 +44,8 @@ rule sortmerna:
         set +o pipefail
 	    #tmp="{input.fastq}"
         #infiles=($tmp)
-        fasta={input.fasta}
-        index=${{fasta%%.fa}}
+        fasta="{input.fasta}"
+        index=${{fasta%.fa}}
 
         if [[ ! -s ${{index}}.stats ]]
         then
@@ -53,7 +53,7 @@ rule sortmerna:
         fi
 
 
-        sortmerna --ref $${{fasta}},${{index}} -a {threads} --reads {input.fastq} --aligned outfile_rRNA --fastx  --sam --num_alignments 1 --other outfile_noRNA --log -v > {log.out} 2> {log.err}
+        sortmerna --ref ${{fasta}},${{index}} -a {threads} --reads {input.fastq} --aligned outfile_rRNA --fastx  --sam --num_alignments 1 --other outfile_noRNA --log -v > {log.out} 2> {log.err}
 
 
         pigz -fc outfile_rRNA > {output.rRNA}
diff --git a/workflow/rules/star_mapping_pass1.rules b/workflow/rules/star_mapping_pass1.rules
index c1df88d..516ead4 100755
--- a/workflow/rules/star_mapping_pass1.rules
+++ b/workflow/rules/star_mapping_pass1.rules
@@ -45,15 +45,27 @@ rule star_mapping_pass1:
         star_mapping_pass1_logs
     shell:
         """ 
-        STAR --genomeDir {params.index} \
-             --readFilesIn {input.fastq}  \
-             --runThreadN {threads} \
-             --genomeLoad NoSharedMemory \
-             --outSAMtype BAM SortedByCoordinate \
-             --readFilesCommand zcat \
-             --seedSearchStartLmax 20 \
-             --outFileNamePrefix {params.prefix} \
-            {params.kwargs}  2> {log}
+        if [[ {input.fastq} == "*.gz"  ]]
+        then
+            STAR --genomeDir {params.index} \
+                 --readFilesIn {input.fastq}  \
+                 --runThreadN {threads} \
+                 --genomeLoad NoSharedMemory \
+                 --outSAMtype BAM SortedByCoordinate \
+                 --readFilesCommand zcat \
+                 --seedSearchStartLmax 20 \
+                 --outFileNamePrefix {params.prefix} \
+                {params.kwargs}  2> {log}
+        else
+             STAR --genomeDir {params.index} \
+                 --readFilesIn {input.fastq}  \
+                 --runThreadN {threads} \
+                 --genomeLoad NoSharedMemory \
+                 --outSAMtype BAM SortedByCoordinate \
+                 --seedSearchStartLmax 20 \
+                 --outFileNamePrefix {params.prefix} \
+                {params.kwargs}  2> {log}   
+        fi    
 
 
         samtools index {params.prefix}Aligned.sortedByCoord.out.bam  2>> {log} 
diff --git a/workflow/rules/star_mapping_pass2.rules b/workflow/rules/star_mapping_pass2.rules
index a792f70..daae75f 100755
--- a/workflow/rules/star_mapping_pass2.rules
+++ b/workflow/rules/star_mapping_pass2.rules
@@ -46,17 +46,31 @@ rule star_mapping_pass2:
         star_mapping_pass2_logs
     shell:
         """ 
-        STAR --genomeDir {params.index} \
-             --readFilesIn {input.fastq}  \
-             --runThreadN {threads} \
-             --genomeLoad NoSharedMemory \
-             --outSAMtype BAM SortedByCoordinate \
-             --readFilesCommand zcat \
-             --seedSearchStartLmax 20 \
-             --outFileNamePrefix {params.prefix} \
-             --outSAMattrRGline {params.RG} \
-             --sjdbFileChrStartEnd {input.sjdb} \
-            {params.kwargs}  2> {log}
+        if [[ {input.fastq} == "*.gz"  ]]
+        then
+            STAR --genomeDir {params.index} \
+                 --readFilesIn {input.fastq}  \
+                 --runThreadN {threads} \
+                 --genomeLoad NoSharedMemory \
+                 --outSAMtype BAM SortedByCoordinate \
+                 --readFilesCommand zcat \
+                 --seedSearchStartLmax 20 \
+                 --outFileNamePrefix {params.prefix} \
+                 --outSAMattrRGline {params.RG} \
+                 --sjdbFileChrStartEnd {input.sjdb} \
+                {params.kwargs}  2> {log}
+        else
+            STAR --genomeDir {params.index} \
+                 --readFilesIn {input.fastq}  \
+                 --runThreadN {threads} \
+                 --genomeLoad NoSharedMemory \
+                 --outSAMtype BAM SortedByCoordinate \
+                 --seedSearchStartLmax 20 \
+                 --outFileNamePrefix {params.prefix} \
+                 --outSAMattrRGline {params.RG} \
+                 --sjdbFileChrStartEnd {input.sjdb} \
+                {params.kwargs}  2> {log}
+        fi
 
 
         samtools index {params.prefix}Aligned.sortedByCoord.out.bam  2>> {log}  
-- 
GitLab