diff --git a/Snakefile b/Snakefile index 07bdd53c372eb3375f49569c482f295be4de58c8..b47b16332a090701d09d27d4a00a19468c815aaf 100755 --- a/Snakefile +++ b/Snakefile @@ -133,21 +133,20 @@ elif config["genome"]["rRNA_mapping"]: def mapping_index(wildcards): if (wildcards.REF == config["genome"]["name"]): - input = config["genome"]["fasta_file"] + return {"fasta": config["genome"]["fasta_file"]} elif (wildcards.REF == config["genome"]["host_name"]): - input = config["genome"]["host_fasta_file"] + return {"fasta": config["genome"]["host_fasta_file"]} elif (wildcards.REF == "Ribo" ): - input = config["genome"]["ribo_fasta_file"] - return(input) + return {"fasta": config["genome"]["ribo_fasta_file"]} def annot_index(wildcards): if (wildcards.REF == config["genome"]["name"]): - input = config["genome"]["gff_file"] + return {"gff_file": config["genome"]["gff_file"]} elif (wildcards.REF == config["genome"]["host_name"]): - input = config["genome"]["host_gff_file"] + return {"gff_file": config["genome"]["host_gff_file"]} elif (wildcards.REF == "Ribo" ): - input = "" - return(input) + return {"gff_file": ""} + @@ -161,7 +160,7 @@ if config["bowtie2_mapping"]["do"]: mapper += ["bowtie2"] if config["genome"]["index"]: # indexing for bowtie2 - bowtie2_index_fasta = mapping_index + bowtie2_index_fasta = unpack(mapping_index) bowtie2_index_output_done = os.path.join(os.path.dirname(config["genome"]["fasta_file"]), "{REF}.1.bt2") bowtie2_index_output_prefix = os.path.join(config["genome"]["genome_directory"],"{REF}") bowtie2_index_log = "02-Mapping/bowtie2/logs/bowtie2_{REF}_indexing.log" @@ -193,10 +192,10 @@ if config["bowtie2_mapping"]["do"]: if config["star_mapping"]["do"]: mapper += ["STAR"] if config["genome"]["index"]: - star_index_fasta = mapping_index + star_index_fasta = unpack(mapping_index) star_index_output_done = os.path.join(config["genome"]["genome_directory"],"{REF}/SAindex") star_index_output_dir = os.path.join(config["genome"]["genome_directory"],"{REF}") - star_mapping_splice_file = annot_index + star_mapping_splice_file = unpack(annot_index) star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log" include: os.path.join(RULES, "star_index.rules") diff --git a/workflow/.gitkeep b/workflow/.gitkeep old mode 100644 new mode 100755 diff --git a/workflow/rules/.gitkeep b/workflow/rules/.gitkeep old mode 100644 new mode 100755 diff --git a/workflow/rules/adapters.rules b/workflow/rules/adapters.rules new file mode 100755 index 0000000000000000000000000000000000000000..76f05ceab17f45b584effbea38073632919909fe --- /dev/null +++ b/workflow/rules/adapters.rules @@ -0,0 +1,69 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + + +rule adapters: + input: + fastq = adapters_input_fastq + output: + adapters_output + params: + wkdir = adapters_wkdir, + options = adapters_options, + adapters = adapters_adapt_list, + mode = adapters_mode, + min = adapters_min, + qual = adapters_qual + singularity: + "rnaflow.img" + threads: config['adapters']['threads'] + log: + adapters_log + envmodules: + "cutadapt", + "cutadapt-devel" + shell: + """ + set +o pipefail + + tmp="{input}" + infiles=($tmp) + + tmp="{output}" + outfiles=($tmp) + + # add mode and adapter sequences + cmd+=" cutadapt -{params.mode} {params.adapters} -m {params.min} -q {params.qual} {params.options} " + # paired end or single end + if [[ ${{#infiles[@]}} -eq 2 ]]; + then + cmd+=" -o ${{outfiles[0]}} -p ${{outfiles[1]}} ${{infiles[0]}} ${{infiles[1]}} " + else + cmd+=" -o ${{outfiles[0]}} ${{infiles[0]}}" + fi + #run command + eval "${{cmd}} > {log}" + + """ diff --git a/workflow/rules/basicCoverage.rules b/workflow/rules/basicCoverage.rules new file mode 100755 index 0000000000000000000000000000000000000000..6da03abc5d0728af68cbe31fadde599be90d62f4 --- /dev/null +++ b/workflow/rules/basicCoverage.rules @@ -0,0 +1,48 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule basicCoverage: + input: + basicCoverage_input + log: + basicCoverage_logs + output: + basicCoverage_output + singularity: + "rnaflow.img" + params: + options = basicCoverage_options + envmodules: + "samtools", + "bedtools" + shell: + """ + + samtools view -f 0x10 -b {input} | bedtools genomecov -ibam stdin -bg > $rev + samtools view -F 0x10 -b {input} | bedtools genomecov -ibam stdin -bg > $fwd + bamCoverage --bam {input} --outFileName {output} {params.options} 2> {log} + """ + + diff --git a/workflow/rules/bowtie2_index.rules b/workflow/rules/bowtie2_index.rules index d7c70561a336abc0201caf845087227947944562..ea8aa99c931531dbded81ea10a4849108bef2e77 100755 --- a/workflow/rules/bowtie2_index.rules +++ b/workflow/rules/bowtie2_index.rules @@ -26,7 +26,7 @@ rule bowtie2_index: input: - fasta = bowtie2_index_fasta + bowtie2_index_fasta output: bowtie2_index_output_done singularity: @@ -40,7 +40,10 @@ rule bowtie2_index: "samtools" shell: """ - bowtie2-build {input.fasta} {params.prefix} 2> {log} - samtools faidx {input.fasta} 2>> {log} + set +o pipefail + + #compute index + bowtie2-build {input} {params.prefix} 2> {log} + samtools faidx {input} 2>> {log} """ diff --git a/workflow/rules/kallisto_index.rules b/workflow/rules/kallisto_index.rules old mode 100644 new mode 100755 diff --git a/workflow/rules/kallisto_quant.rules b/workflow/rules/kallisto_quant.rules old mode 100644 new mode 100755 diff --git a/workflow/rules/star_index.rules b/workflow/rules/star_index.rules old mode 100644 new mode 100755 index 8cfc47d3e44832bcb1bbf8d9022491171b34286f..aedc51efbbc7790d310973e415f7e5e31993b874 --- a/workflow/rules/star_index.rules +++ b/workflow/rules/star_index.rules @@ -25,12 +25,12 @@ rule star_index: input: - fasta = star_index_fasta + star_index_fasta, + star_mapping_splice_file output: star_index_output_done params: - wkdir = star_index_output_dir, - splice_file = star_mapping_splice_file + wkdir = star_index_output_dir singularity: "rnaflow.img" log: @@ -42,16 +42,17 @@ rule star_index: "samtools" shell: """ - GenomeLength=`grep -v ">" {star_index_fasta} | tr -d '\n' | wc -c` - SAindex=$(echo $GenomeLength | awk '{{a=log($1)/2 - 1; if (a>14) a=14; printf("%d\n",a)}}') + set +o pipefail + GenomeLength=`grep -v ">" {input} | tr -d '\n' | wc -c` + SAindex=$(echo $GenomeLength | awk '{{a=log($1)/2 - 1; if (a>14) a=14; printf("%d\\n",a)}}') - if [[ -s {params.splice_file} && {params.splice_file} == "*.gtf" ]] + if [[ -s {input.gff_file} && {input.gff_file} == "*.gtf" ]] then - STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbGTFfile {params.splice_file} --genomeSAindexNbases $SAindex - elif [[ -s {params.splice_file} && {params.splice_file} == "*.gff" ]] + STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbGTFfile {input.gff_file} --genomeSAindexNbases $SAindex + elif [[ -s {input.gff_file} && {input.gff_file} == "*.gff" ]] then - STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbGTFfile {params.splice_file} --sjdbGTFtagExonParentTranscript Parent --genomeSAindexNbases $SAindex + STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --sjdbGTFfile {input.gff_file} --sjdbGTFtagExonParentTranscript Parent --genomeSAindexNbases $SAindex else STAR --runMode genomeGenerate --genomeFastaFiles {input.fasta} --genomeDir {params.wkdir} --runThreadN {threads} --genomeSAindexNbases $SAindex fi