diff --git a/Snakefile b/Snakefile old mode 100644 new mode 100755 index 49ffd6737bd0750936cd9c9a833517d18cb5044d..07bdd53c372eb3375f49569c482f295be4de58c8 --- a/Snakefile +++ b/Snakefile @@ -99,7 +99,7 @@ include: os.path.join(RULES, "fastqc.rules") if config["adapters"]["remove"] : ## TODO add AlienTrimmer - adapter_tool = "adapters" + adapter_tool = "cutadapt" adapters_input_fastq = input_data adapters_wkdir = "01-Trimming" adapters_output = ["01-Trimming/{SAMPLE}_R1_trim.fastq.gz"] @@ -114,7 +114,7 @@ if config["adapters"]["remove"] : adapters_qual = config["adapters"]["quality"] adapters_log = "01-Trimming/logs/{SAMPLE}_trim.txt" final_output.extend(expand(adapters_output, SAMPLE=samples)) - include: os.path.join(RULES, "adapters.rules") + include: os.path.join(RULES, "cutadapt.rules") else: adapters_output = input_data @@ -206,6 +206,7 @@ if config["star_mapping"]["do"]: #first pass mapping star_mapping_pass1_input = adapters_output + star_mapping_pass1_done = star_index_output_done star_mapping_pass1_index = star_index_output_dir star_mapping_pass1_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_init.out" star_mapping_pass1_output_prefix = "02-Mapping/STAR/{REF}/{SAMPLE}_{REF}_init_" @@ -217,6 +218,7 @@ if config["star_mapping"]["do"]: #Second pass mapping star_mapping_pass2_input = adapters_output + star_mapping_pass2_done = star_index_output_done star_mapping_pass2_index = star_index_output_dir star_mapping_pass2_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}.out" star_mapping_pass2_output_prefix = "02-Mapping/STAR/{REF}/{SAMPLE}_{REF}_" diff --git a/config/config.yaml b/config/config.yaml index ed14f4850a30c8bd54e1a78bf696ac7bdba3b9bf..41dc78196b513b83c2b170043c6ee914fcba91e5 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -55,7 +55,7 @@ tmpdir: "/pasteur/sonic/scratch/public/" genome: index: true - genome_directory: /path/to/genome/ + #genome_directory: /path/to/genome/ name: saccer3 fasta_file: /path/to/genome/saccer3.fa gff_file: /path/to/genome/saccer3.gff diff --git a/workflow/rules/alienTrimmer.rules b/workflow/rules/alienTrimmer.rules new file mode 100755 index 0000000000000000000000000000000000000000..ea142092494fb5cfc544e8c16b9ea9b08f99b4ff --- /dev/null +++ b/workflow/rules/alienTrimmer.rules @@ -0,0 +1,68 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + + +rule adapters: + input: + fastq = adapters_input_fastq + output: + adapters_output + params: + wkdir = adapters_wkdir, + options = adapters_options, + adapters = adapters_adapt_list, + mode = adapters_mode, + min = adapters_min, + qual = adapters_qual + singularity: + "rnaflow.img" + threads: config['adapters']['threads'] + log: + adapters_log + envmodules: + "AlienTrimmer/2.0" + shell: + """ + set +o pipefail + + tmp="{input}" + infiles=($tmp) + + tmp="{output}" + outfiles=($tmp) + + # add mode and adapter sequences + cmd+="AlienTrimmer -c {params.adapters} -l {params.min} -q {params.qual} {params.options} -z" + # paired end or single end + if [[ ${{#infiles[@]}} -eq 2 ]]; + then + cmd+=" -o ${{outfiles[0]}} -p ${{outfiles[1]}} -1 ${{infiles[0]}} -2 ${{infiles[1]}} " + else + cmd+=" -o ${{outfiles[0]}} -i ${{infiles[0]}}" + fi + #run command + eval "${{cmd}} > {log}" + + """ diff --git a/workflow/rules/adapters.rules b/workflow/rules/cutadapt.rules similarity index 100% rename from workflow/rules/adapters.rules rename to workflow/rules/cutadapt.rules diff --git a/workflow/rules/kallisto_index.rules b/workflow/rules/kallisto_index.rules new file mode 100644 index 0000000000000000000000000000000000000000..879418d56953836e660239488e48c948a7e9d80c --- /dev/null +++ b/workflow/rules/kallisto_index.rules @@ -0,0 +1,46 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule kallisto_index: + input: + fasta = kallisto_index_fasta + output: + kallisto_index_output + params: + kmer = kallisto_index_kmer + singularity: + "rnaflow.img" + log: + kallisto_index_log + threads: + config['kallisto']['threads'] + envmodules: + "kallisto/0.46.2" + shell: + """ + + kallisto index -i {output} --kmer-size={params.kmer} {input.fasta} + + """ diff --git a/workflow/rules/kallisto_quant.rules b/workflow/rules/kallisto_quant.rules new file mode 100644 index 0000000000000000000000000000000000000000..41275412c0ae955a896ff27a153fb39ee47934da --- /dev/null +++ b/workflow/rules/kallisto_quant.rules @@ -0,0 +1,47 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule kallisto_quant: + input: + index = kallisto_quant_index, + fastq = kallisto_quant_fastq + output: + directory(kallisto_quant_output_dir) + params: + options = kallisto_quant_options + singularity: + "rnaflow.img" + log: + kallisto_pseudo_log + threads: + config['kallisto']['threads'] + envmodules: + "kallisto/0.46.2" + shell: + """ + + kallisto quant -t {threads} {params.options} -o {output} -i {input.index} {input.fastq} + + """ diff --git a/workflow/rules/star_mapping_pass1.rules b/workflow/rules/star_mapping_pass1.rules old mode 100644 new mode 100755 index 6e6b09fc7658909f8af722f1e58edea4b2f3a100..c1df88d488673dc96b8e8394b12e5046af22d5dc --- a/workflow/rules/star_mapping_pass1.rules +++ b/workflow/rules/star_mapping_pass1.rules @@ -25,11 +25,12 @@ rule star_mapping_pass1: input: fastq = star_mapping_pass1_input, - index = star_mapping_pass1_index + done = star_mapping_pass1_done output: jontion = temp(star_mapping_pass1_junctions), bam = temp(star_mapping_pass1_bam) params: + index = star_mapping_pass1_index, prefix = temp(star_mapping_pass1_output_prefix), #read_groups = star_mapping_pass1_read_groups, kwargs = config['star_mapping']['options'] @@ -44,7 +45,7 @@ rule star_mapping_pass1: star_mapping_pass1_logs shell: """ - STAR --genomeDir {input.index} \ + STAR --genomeDir {params.index} \ --readFilesIn {input.fastq} \ --runThreadN {threads} \ --genomeLoad NoSharedMemory \ diff --git a/workflow/rules/star_mapping_pass2.rules b/workflow/rules/star_mapping_pass2.rules old mode 100644 new mode 100755 index 1c4d677d3f5873084c5d7ec1e401715d8bc87ce3..a792f700945a73d6ff649f98ba4a4e4885884a20 --- a/workflow/rules/star_mapping_pass2.rules +++ b/workflow/rules/star_mapping_pass2.rules @@ -25,12 +25,13 @@ rule star_mapping_pass2: input: fastq = star_mapping_pass2_input, - index = star_mapping_pass2_index, + done = star_mapping_pass2_done, sjdb = star_mapping_pass2_junctions, bam = star_mapping_pass2_bam output: star_mapping_pass2_sort params: + index = star_mapping_pass2_index, prefix = temp(star_mapping_pass2_output_prefix), RG = star_mapping_pass2_read_groups, kwargs = config['star_mapping']['options'] @@ -45,7 +46,7 @@ rule star_mapping_pass2: star_mapping_pass2_logs shell: """ - STAR --genomeDir {input.index} \ + STAR --genomeDir {params.index} \ --readFilesIn {input.fastq} \ --runThreadN {threads} \ --genomeLoad NoSharedMemory \