From d72267098d4c3ea8ba92c14e5f2a64877beb3c68 Mon Sep 17 00:00:00 2001 From: rlegendr <rachel.legendre@pasteur.fr> Date: Tue, 24 Aug 2021 14:44:07 +0200 Subject: [PATCH] fix issue #32 --- Snakefile | 6 +- config/config.yaml | 2 +- workflow/rules/alienTrimmer.rules | 68 +++++++++++++++++++ .../rules/{adapters.rules => cutadapt.rules} | 0 workflow/rules/kallisto_index.rules | 46 +++++++++++++ workflow/rules/kallisto_quant.rules | 47 +++++++++++++ workflow/rules/star_mapping_pass1.rules | 5 +- workflow/rules/star_mapping_pass2.rules | 5 +- 8 files changed, 172 insertions(+), 7 deletions(-) mode change 100644 => 100755 Snakefile create mode 100755 workflow/rules/alienTrimmer.rules rename workflow/rules/{adapters.rules => cutadapt.rules} (100%) create mode 100644 workflow/rules/kallisto_index.rules create mode 100644 workflow/rules/kallisto_quant.rules mode change 100644 => 100755 workflow/rules/star_mapping_pass1.rules mode change 100644 => 100755 workflow/rules/star_mapping_pass2.rules diff --git a/Snakefile b/Snakefile old mode 100644 new mode 100755 index 49ffd67..07bdd53 --- a/Snakefile +++ b/Snakefile @@ -99,7 +99,7 @@ include: os.path.join(RULES, "fastqc.rules") if config["adapters"]["remove"] : ## TODO add AlienTrimmer - adapter_tool = "adapters" + adapter_tool = "cutadapt" adapters_input_fastq = input_data adapters_wkdir = "01-Trimming" adapters_output = ["01-Trimming/{SAMPLE}_R1_trim.fastq.gz"] @@ -114,7 +114,7 @@ if config["adapters"]["remove"] : adapters_qual = config["adapters"]["quality"] adapters_log = "01-Trimming/logs/{SAMPLE}_trim.txt" final_output.extend(expand(adapters_output, SAMPLE=samples)) - include: os.path.join(RULES, "adapters.rules") + include: os.path.join(RULES, "cutadapt.rules") else: adapters_output = input_data @@ -206,6 +206,7 @@ if config["star_mapping"]["do"]: #first pass mapping star_mapping_pass1_input = adapters_output + star_mapping_pass1_done = star_index_output_done star_mapping_pass1_index = star_index_output_dir star_mapping_pass1_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}_init.out" star_mapping_pass1_output_prefix = "02-Mapping/STAR/{REF}/{SAMPLE}_{REF}_init_" @@ -217,6 +218,7 @@ if config["star_mapping"]["do"]: #Second pass mapping star_mapping_pass2_input = adapters_output + star_mapping_pass2_done = star_index_output_done star_mapping_pass2_index = star_index_output_dir star_mapping_pass2_logs = "02-Mapping/STAR/logs/{SAMPLE}_{REF}.out" star_mapping_pass2_output_prefix = "02-Mapping/STAR/{REF}/{SAMPLE}_{REF}_" diff --git a/config/config.yaml b/config/config.yaml index ed14f48..41dc781 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -55,7 +55,7 @@ tmpdir: "/pasteur/sonic/scratch/public/" genome: index: true - genome_directory: /path/to/genome/ + #genome_directory: /path/to/genome/ name: saccer3 fasta_file: /path/to/genome/saccer3.fa gff_file: /path/to/genome/saccer3.gff diff --git a/workflow/rules/alienTrimmer.rules b/workflow/rules/alienTrimmer.rules new file mode 100755 index 0000000..ea14209 --- /dev/null +++ b/workflow/rules/alienTrimmer.rules @@ -0,0 +1,68 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + + +rule adapters: + input: + fastq = adapters_input_fastq + output: + adapters_output + params: + wkdir = adapters_wkdir, + options = adapters_options, + adapters = adapters_adapt_list, + mode = adapters_mode, + min = adapters_min, + qual = adapters_qual + singularity: + "rnaflow.img" + threads: config['adapters']['threads'] + log: + adapters_log + envmodules: + "AlienTrimmer/2.0" + shell: + """ + set +o pipefail + + tmp="{input}" + infiles=($tmp) + + tmp="{output}" + outfiles=($tmp) + + # add mode and adapter sequences + cmd+="AlienTrimmer -c {params.adapters} -l {params.min} -q {params.qual} {params.options} -z" + # paired end or single end + if [[ ${{#infiles[@]}} -eq 2 ]]; + then + cmd+=" -o ${{outfiles[0]}} -p ${{outfiles[1]}} -1 ${{infiles[0]}} -2 ${{infiles[1]}} " + else + cmd+=" -o ${{outfiles[0]}} -i ${{infiles[0]}}" + fi + #run command + eval "${{cmd}} > {log}" + + """ diff --git a/workflow/rules/adapters.rules b/workflow/rules/cutadapt.rules similarity index 100% rename from workflow/rules/adapters.rules rename to workflow/rules/cutadapt.rules diff --git a/workflow/rules/kallisto_index.rules b/workflow/rules/kallisto_index.rules new file mode 100644 index 0000000..879418d --- /dev/null +++ b/workflow/rules/kallisto_index.rules @@ -0,0 +1,46 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule kallisto_index: + input: + fasta = kallisto_index_fasta + output: + kallisto_index_output + params: + kmer = kallisto_index_kmer + singularity: + "rnaflow.img" + log: + kallisto_index_log + threads: + config['kallisto']['threads'] + envmodules: + "kallisto/0.46.2" + shell: + """ + + kallisto index -i {output} --kmer-size={params.kmer} {input.fasta} + + """ diff --git a/workflow/rules/kallisto_quant.rules b/workflow/rules/kallisto_quant.rules new file mode 100644 index 0000000..4127541 --- /dev/null +++ b/workflow/rules/kallisto_quant.rules @@ -0,0 +1,47 @@ +######################################################################### +# RNAflow: an automated pipeline to analyse transcriptomic data # +# # +# Authors: Rachel Legendre # +# Copyright (c) 2021-2022 Institut Pasteur (Paris). # +# # +# This file is part of RNAflow workflow. # +# # +# RNAflow is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# RNAflow is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with RNAflow (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule kallisto_quant: + input: + index = kallisto_quant_index, + fastq = kallisto_quant_fastq + output: + directory(kallisto_quant_output_dir) + params: + options = kallisto_quant_options + singularity: + "rnaflow.img" + log: + kallisto_pseudo_log + threads: + config['kallisto']['threads'] + envmodules: + "kallisto/0.46.2" + shell: + """ + + kallisto quant -t {threads} {params.options} -o {output} -i {input.index} {input.fastq} + + """ diff --git a/workflow/rules/star_mapping_pass1.rules b/workflow/rules/star_mapping_pass1.rules old mode 100644 new mode 100755 index 6e6b09f..c1df88d --- a/workflow/rules/star_mapping_pass1.rules +++ b/workflow/rules/star_mapping_pass1.rules @@ -25,11 +25,12 @@ rule star_mapping_pass1: input: fastq = star_mapping_pass1_input, - index = star_mapping_pass1_index + done = star_mapping_pass1_done output: jontion = temp(star_mapping_pass1_junctions), bam = temp(star_mapping_pass1_bam) params: + index = star_mapping_pass1_index, prefix = temp(star_mapping_pass1_output_prefix), #read_groups = star_mapping_pass1_read_groups, kwargs = config['star_mapping']['options'] @@ -44,7 +45,7 @@ rule star_mapping_pass1: star_mapping_pass1_logs shell: """ - STAR --genomeDir {input.index} \ + STAR --genomeDir {params.index} \ --readFilesIn {input.fastq} \ --runThreadN {threads} \ --genomeLoad NoSharedMemory \ diff --git a/workflow/rules/star_mapping_pass2.rules b/workflow/rules/star_mapping_pass2.rules old mode 100644 new mode 100755 index 1c4d677..a792f70 --- a/workflow/rules/star_mapping_pass2.rules +++ b/workflow/rules/star_mapping_pass2.rules @@ -25,12 +25,13 @@ rule star_mapping_pass2: input: fastq = star_mapping_pass2_input, - index = star_mapping_pass2_index, + done = star_mapping_pass2_done, sjdb = star_mapping_pass2_junctions, bam = star_mapping_pass2_bam output: star_mapping_pass2_sort params: + index = star_mapping_pass2_index, prefix = temp(star_mapping_pass2_output_prefix), RG = star_mapping_pass2_read_groups, kwargs = config['star_mapping']['options'] @@ -45,7 +46,7 @@ rule star_mapping_pass2: star_mapping_pass2_logs shell: """ - STAR --genomeDir {input.index} \ + STAR --genomeDir {params.index} \ --readFilesIn {input.fastq} \ --runThreadN {threads} \ --genomeLoad NoSharedMemory \ -- GitLab