diff --git a/Snakefile b/Snakefile index 2e6bdf5da197edcd07da4acc6109527d20312df3..53a9f90af5990bf81d959012a03540de2d09021a 100644 --- a/Snakefile +++ b/Snakefile @@ -133,6 +133,13 @@ def mapping_index(wildcards): elif (wildcards.REF == config["genome"]["host_name"]): input = config["genome"]["host_fasta_file"] return(input) + +def annot_index(wildcards): + if (wildcards.REF == config["genome"]["name"]): + input = config["genome"]["gff_file"] + elif (wildcards.REF == config["genome"]["host_name"]): + input = config["genome"]["host_gff_file"] + return(input) @@ -171,22 +178,6 @@ if config["bowtie2_mapping"]["do"]: include: os.path.join(RULES, "bowtie2_mapping.rules") -#---------------------------------- -# Double mapping gestion -#---------------------------------- - -type = ['init', 'final'] - - -def mapping_junction(wildcards): - if (wildcards.TYPE == 'init' ) : - {'done' : 'os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex" )'.format(REF=wildcards), - 'index' : 'os.path.join(config["genome"]["genome_directory"],"{REF}")'.format(REF=wildcards) } - elif (wildcards.TYPE == 'final' ) : - {'done' : '"02-Mapping/STAR/{REF}/SAindex"'.format(REF=wildcards), - 'index' : '"02-Mapping/STAR/{REF}"'.format(REF=wildcards) } - return(input) - #---------------------------------- # STAR MAPPING #---------------------------------- @@ -194,10 +185,10 @@ def mapping_junction(wildcards): if config["star_mapping"]["do"]: mapper += ["STAR"] if config["genome"]["index"]: - star_index_fasta = adapters_output - star_index_output_done = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex" ) + star_index_fasta = mapping_index + star_index_output_done = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex") star_index_output_dir = os.path.join(config["genome"]["genome_directory"],"{REF}") - star_mapping_splice_file = "" + star_mapping_splice_file = annot_index star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log" include: os.path.join(RULES, "star_index.rules") @@ -206,22 +197,25 @@ if config["star_mapping"]["do"]: # star_index_output_done = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex" ) #first pass mapping - star_mapping_input = adapters_output - star_mapping_index = unpack(mapping_junction) - star_mapping_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_{TYPE}.out" - star_mapping_output_prefix = "02-Mapping/STAR/{SAMPLE}_{REF}_init" - star_mapping_junctions = "02-Mapping/STAR/{SAMPLE}_{REF}_{TYPE}_SJ.out.tab" - star_mapping_read_groups = "" - star_mapping_sort = "02-Mapping/STAR/{SAMPLE}_{REF}_{TYPE}_Aligned.sortedByCoord.out.bam" - final_output.extend(expand(star_mapping_sort, SAMPLE=samples, REF=ref, TYPE=type)) - include: os.path.join(RULES, "star_mapping.rules") + star_mapping_pass1_input = adapters_output + star_mapping_pass1_index = star_index_output_done + star_mapping_pass1_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_init.out" + star_mapping_pass1_output_prefix = "02-Mapping/STAR/{SAMPLE}_{REF}_init" + star_mapping_pass1_junctions = "02-Mapping/STAR/{SAMPLE}_{REF}_SJ.out.tab" + star_mapping_pass1_read_groups = "" + final_output.extend(expand(star_mapping_pass1_junctions, SAMPLE=samples, REF=ref)) + include: os.path.join(RULES, "star_mapping_pass1.rules") - star_index_fasta = mapping_index - star_index_output_done = "02-Mapping/STAR/{REF}/SAindex" - star_index_output_dir = "02-Mapping/STAR/{REF}" - star_mapping_splice_file = expand("02-Mapping/STAR/{SAMPLE}_{REF}_init_SJ.out.tab", SAMPLE=samples, REF=ref) - star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log" - final_output.extend(expand(star_index_output_dir, REF=ref)) + #Second pass mapping + star_mapping_pass2_input = adapters_output + star_mapping_pass2_index = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex") + star_mapping_pass2_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}.out" + star_mapping_pass2_output_prefix = "02-Mapping/STAR/{SAMPLE}_{REF}" + star_mapping_pass2_junctions = "02-Mapping/STAR/{SAMPLE}_{REF}_SJ.out.tab" + star_mapping_pass2_read_groups = "" + star_mapping_pass2_sort = "02-Mapping/STAR/{SAMPLE}_{REF}_Aligned.sortedByCoord.out.bam" + final_output.extend(expand(star_mapping_pass2_sort, SAMPLE=samples, REF=ref)) + include: os.path.join(RULES, "star_mapping_pass2.rules") @@ -229,12 +223,18 @@ if config["star_mapping"]["do"]: #---------------------------------- # counting step #---------------------------------- + +def counting_index(wildcards): + if (wildcards.MAP == "bowtie2"): + input = "02-Mapping/{MAP}/{REF}/{SAMPLE}_{REF}_sort.bam" + elif (wildcards.MAP == "STAR"): + input = "02-Mapping/{MAP}/{SAMPLE}_{REF}_Aligned.sortedByCoord.out.bam" + return(input) + + if config["feature_counts"]["do"]: - if config["bowtie2_mapping"]["do"]: - feature_counts_input = "02-Mapping/{MAP}/{REF}/{SAMPLE}_{REF}_sort.bam" - elif config["star_mapping"]["do"]: - feature_counts_input = "02-Mapping/{MAP}/{SAMPLE}_{REF}_Aligned.sortedByCoord.out.bam" + feature_counts_input = counting_index feature_counts_output_count = "03-Counting/{MAP}/{SAMPLE}_{REF}_feature.out" feature_counts_gff = config['genome']['gff_file'] feature_counts_logs_err = "03-Counting/logs/{MAP}/{SAMPLE}_{REF}_feature.err" diff --git a/config/config.yaml b/config/config.yaml index 5e0fe7ca12dd29f13d1c91305473b814fe9cdd59..8d6e1c5325e627127efd1a0911970db7cd329ad9 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -132,7 +132,7 @@ bowtie2_mapping: star_mapping: - do: no + do: yes options: "--outFilterMismatchNoverLmax 0.05 --outSAMunmapped Within " threads: 4 diff --git a/workflow/rules/bowtie2_mapping.rules b/workflow/rules/bowtie2_mapping.rules index acf8f4f17a328e8362f168a1da58da2b2469e9b3..7e3cb31acf054e02c6af959ab61a8e69065d9c1e 100755 --- a/workflow/rules/bowtie2_mapping.rules +++ b/workflow/rules/bowtie2_mapping.rules @@ -67,7 +67,7 @@ rule bowtie2_mapping: cmd="(${{cmd}}) > {log.out} 2> {log.err}" # sort result - cmd+=" && samtools sort -o {output.bam} {params.prefix} > {output.sort} " + cmd+=" && samtools sort -o {output.sort} {output.bam} " cmd+=" && samtools index {output.sort}" #run command diff --git a/workflow/rules/star_index.rules b/workflow/rules/star_index.rules index 118c778642b1cfd0baacd010436ce61daf87fdfe..cc7186c520c30251575a5af323775a5d4fcf14d8 100644 --- a/workflow/rules/star_index.rules +++ b/workflow/rules/star_index.rules @@ -44,7 +44,7 @@ rule star_index: """ if [[ -s {params.splice_file} ]] then - STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbFileChrStartEnd {params.splice_file} + STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbGTFfile {params.splice_file} else STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} fi diff --git a/workflow/rules/star_mapping.rules b/workflow/rules/star_mapping.rules deleted file mode 100644 index 5834fac49887ed7e55447c71b93183155ef4bf4a..0000000000000000000000000000000000000000 --- a/workflow/rules/star_mapping.rules +++ /dev/null @@ -1,59 +0,0 @@ -######################################################################### -# RNAflow: an automated pipeline to analyse transcriptomic data # -# # -# Authors: Rachel Legendre # -# Copyright (c) 2021-2022 Institut Pasteur (Paris). # -# # -# This file is part of RNAflow workflow. # -# # -# RNAflow is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation, either version 3 of the License, or # -# (at your option) any later version. # -# # -# RNAflow is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details . # -# # -# You should have received a copy of the GNU General Public License # -# along with RNAflow (LICENSE). # -# If not, see <https://www.gnu.org/licenses/>. # -######################################################################### - - -rule star_mapping: - input: - fastq = star_mapping_input, - star_mapping_index - log: - star_mapping_logs - output: - star_mapping_sort - params: - prefix = temp(star_mapping_output_prefix), - junctions = star_mapping_junctions, - #read_groups = star_mapping_read_groups, - kwargs = config['star_mapping']['options'] - singularity: - "rnaflow.img" - threads: - config['star_mapping']['threads'] - envmodules: - "STAR", - "samtools" - shell: - """ - STAR --genomeDir {input.star_mapping_index.index} \ - --readFilesIn {input.fastq} \ - --runThreadN {threads} \ - --genomeLoad NoSharedMemory \ - --outSAMtype BAM SortedByCoordinate \ - --readFilesCommand zcat \ - --seedSearchStartLmax 20 \ - --outFileNamePrefix {params.prefix} \ - {params.kwargs} 2> {log} - - - samtools index {params.prefix}Aligned.sortedByCoord.out.bam 2>> {log} - """ diff --git a/workflow/rules/star_mapping_old b/workflow/rules/star_mapping_old deleted file mode 100644 index 7c555dddaf80b48e7b900283a4772c7baa48540f..0000000000000000000000000000000000000000 --- a/workflow/rules/star_mapping_old +++ /dev/null @@ -1,83 +0,0 @@ -######################################################################### -# RNAflow: an automated pipeline to analyse transcriptomic data # -# # -# Authors: Rachel Legendre # -# Copyright (c) 2021-2022 Institut Pasteur (Paris). # -# # -# This file is part of RNAflow workflow. # -# # -# RNAflow is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation, either version 3 of the License, or # -# (at your option) any later version. # -# # -# RNAflow is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details . # -# # -# You should have received a copy of the GNU General Public License # -# along with RNAflow (LICENSE). # -# If not, see <https://www.gnu.org/licenses/>. # -######################################################################### - - -rule star_mapping: - - input: - fastq = __star_mapping__input, - index = __star_mapping__index_done - log: - __star_mapping__logs - output: - __star_mapping__sort - params: - prefix1 = temp(__star_mapping__output_prefix1), - prefix2 = __star_mapping__output_prefix2, - index = __star_mapping__index, - ref = __star_mapping__ref, - #read_groups = __star_mapping__read_groups, - genome_dir = temp(__star_mapping__genome_dir), - splice_file = temp(__star_mapping__splice_file), - kwargs = config['star_mapping']['options'] - threads: - config['star_mapping']['threads'] - #Resources: - # ram = config['star_mapping']['ram'] - run: - shell(""" echo "Run rna-star 1st pass" - STAR --genomeDir {params.index} \ - --readFilesIn {input.fastq} \ - --runThreadN {threads} \ - --genomeLoad NoSharedMemory \ - --outSAMtype BAM SortedByCoordinate \ - --readFilesCommand zcat \ - --seedSearchStartLmax 20 \ - --outFileNamePrefix {params.prefix1} \ - {params.kwargs} 2> {log}""") - - shell(""" echo "run rna-star genome indexing" - if [ ! -d "{params.genome_dir}" ]; then - mkdir {params.genome_dir} - fi - STAR --runMode genomeGenerate \ - --genomeDir {params.genome_dir} \ - --genomeFastaFiles {params.ref} \ - --sjdbFileChrStartEnd {params.splice_file} \ - --sjdbOverhang 100 \ - --runThreadN {threads} 2>> {log}""") - - - shell(""" echo "Run rna-star 2nd pass" - STAR --genomeDir {params.genome_dir} \ - --readFilesIn {input.fastq} \ - --runThreadN {threads} \ - --genomeLoad NoSharedMemory \ - --sjdbFileChrStartEnd {params.splice_file} \ - --outSAMtype BAM SortedByCoordinate \ - --readFilesCommand zcat \ - --outFileNamePrefix {params.prefix2} \ - {params.kwargs} 2>> {log}""") - - - shell(""" samtools index {params.prefix2}Aligned.sortedByCoord.out.bam 2>> {log} """)