From 88c8511a674cacd76d261f2617be0b6f5db30012 Mon Sep 17 00:00:00 2001
From: rlegendr <rachel.legendre@pasteur.fr>
Date: Wed, 17 Feb 2021 14:55:39 +0100
Subject: [PATCH] fix error bowtie2
---
Snakefile | 76 ++++++++++++-------------
config/config.yaml | 2 +-
workflow/rules/bowtie2_mapping.rules | 2 +-
workflow/rules/star_index.rules | 2 +-
workflow/rules/star_mapping.rules | 59 --------------------
workflow/rules/star_mapping_old | 83 ----------------------------
6 files changed, 41 insertions(+), 183 deletions(-)
delete mode 100644 workflow/rules/star_mapping.rules
delete mode 100644 workflow/rules/star_mapping_old
diff --git a/Snakefile b/Snakefile
index 2e6bdf5..53a9f90 100644
--- a/Snakefile
+++ b/Snakefile
@@ -133,6 +133,13 @@ def mapping_index(wildcards):
elif (wildcards.REF == config["genome"]["host_name"]):
input = config["genome"]["host_fasta_file"]
return(input)
+
+def annot_index(wildcards):
+ if (wildcards.REF == config["genome"]["name"]):
+ input = config["genome"]["gff_file"]
+ elif (wildcards.REF == config["genome"]["host_name"]):
+ input = config["genome"]["host_gff_file"]
+ return(input)
@@ -171,22 +178,6 @@ if config["bowtie2_mapping"]["do"]:
include: os.path.join(RULES, "bowtie2_mapping.rules")
-#----------------------------------
-# Double mapping gestion
-#----------------------------------
-
-type = ['init', 'final']
-
-
-def mapping_junction(wildcards):
- if (wildcards.TYPE == 'init' ) :
- {'done' : 'os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex" )'.format(REF=wildcards),
- 'index' : 'os.path.join(config["genome"]["genome_directory"],"{REF}")'.format(REF=wildcards) }
- elif (wildcards.TYPE == 'final' ) :
- {'done' : '"02-Mapping/STAR/{REF}/SAindex"'.format(REF=wildcards),
- 'index' : '"02-Mapping/STAR/{REF}"'.format(REF=wildcards) }
- return(input)
-
#----------------------------------
# STAR MAPPING
#----------------------------------
@@ -194,10 +185,10 @@ def mapping_junction(wildcards):
if config["star_mapping"]["do"]:
mapper += ["STAR"]
if config["genome"]["index"]:
- star_index_fasta = adapters_output
- star_index_output_done = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex" )
+ star_index_fasta = mapping_index
+ star_index_output_done = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex")
star_index_output_dir = os.path.join(config["genome"]["genome_directory"],"{REF}")
- star_mapping_splice_file = ""
+ star_mapping_splice_file = annot_index
star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log"
include: os.path.join(RULES, "star_index.rules")
@@ -206,22 +197,25 @@ if config["star_mapping"]["do"]:
# star_index_output_done = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex" )
#first pass mapping
- star_mapping_input = adapters_output
- star_mapping_index = unpack(mapping_junction)
- star_mapping_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_{TYPE}.out"
- star_mapping_output_prefix = "02-Mapping/STAR/{SAMPLE}_{REF}_init"
- star_mapping_junctions = "02-Mapping/STAR/{SAMPLE}_{REF}_{TYPE}_SJ.out.tab"
- star_mapping_read_groups = ""
- star_mapping_sort = "02-Mapping/STAR/{SAMPLE}_{REF}_{TYPE}_Aligned.sortedByCoord.out.bam"
- final_output.extend(expand(star_mapping_sort, SAMPLE=samples, REF=ref, TYPE=type))
- include: os.path.join(RULES, "star_mapping.rules")
+ star_mapping_pass1_input = adapters_output
+ star_mapping_pass1_index = star_index_output_done
+ star_mapping_pass1_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_init.out"
+ star_mapping_pass1_output_prefix = "02-Mapping/STAR/{SAMPLE}_{REF}_init"
+ star_mapping_pass1_junctions = "02-Mapping/STAR/{SAMPLE}_{REF}_SJ.out.tab"
+ star_mapping_pass1_read_groups = ""
+ final_output.extend(expand(star_mapping_pass1_junctions, SAMPLE=samples, REF=ref))
+ include: os.path.join(RULES, "star_mapping_pass1.rules")
- star_index_fasta = mapping_index
- star_index_output_done = "02-Mapping/STAR/{REF}/SAindex"
- star_index_output_dir = "02-Mapping/STAR/{REF}"
- star_mapping_splice_file = expand("02-Mapping/STAR/{SAMPLE}_{REF}_init_SJ.out.tab", SAMPLE=samples, REF=ref)
- star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log"
- final_output.extend(expand(star_index_output_dir, REF=ref))
+ #Second pass mapping
+ star_mapping_pass2_input = adapters_output
+ star_mapping_pass2_index = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex")
+ star_mapping_pass2_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}.out"
+ star_mapping_pass2_output_prefix = "02-Mapping/STAR/{SAMPLE}_{REF}"
+ star_mapping_pass2_junctions = "02-Mapping/STAR/{SAMPLE}_{REF}_SJ.out.tab"
+ star_mapping_pass2_read_groups = ""
+ star_mapping_pass2_sort = "02-Mapping/STAR/{SAMPLE}_{REF}_Aligned.sortedByCoord.out.bam"
+ final_output.extend(expand(star_mapping_pass2_sort, SAMPLE=samples, REF=ref))
+ include: os.path.join(RULES, "star_mapping_pass2.rules")
@@ -229,12 +223,18 @@ if config["star_mapping"]["do"]:
#----------------------------------
# counting step
#----------------------------------
+
+def counting_index(wildcards):
+ if (wildcards.MAP == "bowtie2"):
+ input = "02-Mapping/{MAP}/{REF}/{SAMPLE}_{REF}_sort.bam"
+ elif (wildcards.MAP == "STAR"):
+ input = "02-Mapping/{MAP}/{SAMPLE}_{REF}_Aligned.sortedByCoord.out.bam"
+ return(input)
+
+
if config["feature_counts"]["do"]:
- if config["bowtie2_mapping"]["do"]:
- feature_counts_input = "02-Mapping/{MAP}/{REF}/{SAMPLE}_{REF}_sort.bam"
- elif config["star_mapping"]["do"]:
- feature_counts_input = "02-Mapping/{MAP}/{SAMPLE}_{REF}_Aligned.sortedByCoord.out.bam"
+ feature_counts_input = counting_index
feature_counts_output_count = "03-Counting/{MAP}/{SAMPLE}_{REF}_feature.out"
feature_counts_gff = config['genome']['gff_file']
feature_counts_logs_err = "03-Counting/logs/{MAP}/{SAMPLE}_{REF}_feature.err"
diff --git a/config/config.yaml b/config/config.yaml
index 5e0fe7c..8d6e1c5 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -132,7 +132,7 @@ bowtie2_mapping:
star_mapping:
- do: no
+ do: yes
options: "--outFilterMismatchNoverLmax 0.05 --outSAMunmapped Within "
threads: 4
diff --git a/workflow/rules/bowtie2_mapping.rules b/workflow/rules/bowtie2_mapping.rules
index acf8f4f..7e3cb31 100755
--- a/workflow/rules/bowtie2_mapping.rules
+++ b/workflow/rules/bowtie2_mapping.rules
@@ -67,7 +67,7 @@ rule bowtie2_mapping:
cmd="(${{cmd}}) > {log.out} 2> {log.err}"
# sort result
- cmd+=" && samtools sort -o {output.bam} {params.prefix} > {output.sort} "
+ cmd+=" && samtools sort -o {output.sort} {output.bam} "
cmd+=" && samtools index {output.sort}"
#run command
diff --git a/workflow/rules/star_index.rules b/workflow/rules/star_index.rules
index 118c778..cc7186c 100644
--- a/workflow/rules/star_index.rules
+++ b/workflow/rules/star_index.rules
@@ -44,7 +44,7 @@ rule star_index:
"""
if [[ -s {params.splice_file} ]]
then
- STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbFileChrStartEnd {params.splice_file}
+ STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbGTFfile {params.splice_file}
else
STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads}
fi
diff --git a/workflow/rules/star_mapping.rules b/workflow/rules/star_mapping.rules
deleted file mode 100644
index 5834fac..0000000
--- a/workflow/rules/star_mapping.rules
+++ /dev/null
@@ -1,59 +0,0 @@
-#########################################################################
-# RNAflow: an automated pipeline to analyse transcriptomic data #
-# #
-# Authors: Rachel Legendre #
-# Copyright (c) 2021-2022 Institut Pasteur (Paris). #
-# #
-# This file is part of RNAflow workflow. #
-# #
-# RNAflow is free software: you can redistribute it and/or modify #
-# it under the terms of the GNU General Public License as published by #
-# the Free Software Foundation, either version 3 of the License, or #
-# (at your option) any later version. #
-# #
-# RNAflow is distributed in the hope that it will be useful, #
-# but WITHOUT ANY WARRANTY; without even the implied warranty of #
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
-# GNU General Public License for more details . #
-# #
-# You should have received a copy of the GNU General Public License #
-# along with RNAflow (LICENSE). #
-# If not, see <https://www.gnu.org/licenses/>. #
-#########################################################################
-
-
-rule star_mapping:
- input:
- fastq = star_mapping_input,
- star_mapping_index
- log:
- star_mapping_logs
- output:
- star_mapping_sort
- params:
- prefix = temp(star_mapping_output_prefix),
- junctions = star_mapping_junctions,
- #read_groups = star_mapping_read_groups,
- kwargs = config['star_mapping']['options']
- singularity:
- "rnaflow.img"
- threads:
- config['star_mapping']['threads']
- envmodules:
- "STAR",
- "samtools"
- shell:
- """
- STAR --genomeDir {input.star_mapping_index.index} \
- --readFilesIn {input.fastq} \
- --runThreadN {threads} \
- --genomeLoad NoSharedMemory \
- --outSAMtype BAM SortedByCoordinate \
- --readFilesCommand zcat \
- --seedSearchStartLmax 20 \
- --outFileNamePrefix {params.prefix} \
- {params.kwargs} 2> {log}
-
-
- samtools index {params.prefix}Aligned.sortedByCoord.out.bam 2>> {log}
- """
diff --git a/workflow/rules/star_mapping_old b/workflow/rules/star_mapping_old
deleted file mode 100644
index 7c555dd..0000000
--- a/workflow/rules/star_mapping_old
+++ /dev/null
@@ -1,83 +0,0 @@
-#########################################################################
-# RNAflow: an automated pipeline to analyse transcriptomic data #
-# #
-# Authors: Rachel Legendre #
-# Copyright (c) 2021-2022 Institut Pasteur (Paris). #
-# #
-# This file is part of RNAflow workflow. #
-# #
-# RNAflow is free software: you can redistribute it and/or modify #
-# it under the terms of the GNU General Public License as published by #
-# the Free Software Foundation, either version 3 of the License, or #
-# (at your option) any later version. #
-# #
-# RNAflow is distributed in the hope that it will be useful, #
-# but WITHOUT ANY WARRANTY; without even the implied warranty of #
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
-# GNU General Public License for more details . #
-# #
-# You should have received a copy of the GNU General Public License #
-# along with RNAflow (LICENSE). #
-# If not, see <https://www.gnu.org/licenses/>. #
-#########################################################################
-
-
-rule star_mapping:
-
- input:
- fastq = __star_mapping__input,
- index = __star_mapping__index_done
- log:
- __star_mapping__logs
- output:
- __star_mapping__sort
- params:
- prefix1 = temp(__star_mapping__output_prefix1),
- prefix2 = __star_mapping__output_prefix2,
- index = __star_mapping__index,
- ref = __star_mapping__ref,
- #read_groups = __star_mapping__read_groups,
- genome_dir = temp(__star_mapping__genome_dir),
- splice_file = temp(__star_mapping__splice_file),
- kwargs = config['star_mapping']['options']
- threads:
- config['star_mapping']['threads']
- #Resources:
- # ram = config['star_mapping']['ram']
- run:
- shell(""" echo "Run rna-star 1st pass"
- STAR --genomeDir {params.index} \
- --readFilesIn {input.fastq} \
- --runThreadN {threads} \
- --genomeLoad NoSharedMemory \
- --outSAMtype BAM SortedByCoordinate \
- --readFilesCommand zcat \
- --seedSearchStartLmax 20 \
- --outFileNamePrefix {params.prefix1} \
- {params.kwargs} 2> {log}""")
-
- shell(""" echo "run rna-star genome indexing"
- if [ ! -d "{params.genome_dir}" ]; then
- mkdir {params.genome_dir}
- fi
- STAR --runMode genomeGenerate \
- --genomeDir {params.genome_dir} \
- --genomeFastaFiles {params.ref} \
- --sjdbFileChrStartEnd {params.splice_file} \
- --sjdbOverhang 100 \
- --runThreadN {threads} 2>> {log}""")
-
-
- shell(""" echo "Run rna-star 2nd pass"
- STAR --genomeDir {params.genome_dir} \
- --readFilesIn {input.fastq} \
- --runThreadN {threads} \
- --genomeLoad NoSharedMemory \
- --sjdbFileChrStartEnd {params.splice_file} \
- --outSAMtype BAM SortedByCoordinate \
- --readFilesCommand zcat \
- --outFileNamePrefix {params.prefix2} \
- {params.kwargs} 2>> {log}""")
-
-
- shell(""" samtools index {params.prefix2}Aligned.sortedByCoord.out.bam 2>> {log} """)
--
GitLab