Skip to content
Snippets Groups Projects
Commit 4bbdebbf authored by Rachel  LEGENDRE's avatar Rachel LEGENDRE
Browse files

add Kallisto and sortmeRNA

parent e4e4c1bc
No related branches found
No related tags found
No related merge requests found
...@@ -105,7 +105,6 @@ if config["adapters"]["remove"] : ...@@ -105,7 +105,6 @@ if config["adapters"]["remove"] :
adapters_output += ["01-Trimming/{SAMPLE}_R2_trim.fastq.gz"] adapters_output += ["01-Trimming/{SAMPLE}_R2_trim.fastq.gz"]
# Set parameters # Set parameters
adapters_adapt_list = config["adapters"]["adapter_list"]
adapters_options = config["adapters"]["options"] adapters_options = config["adapters"]["options"]
adapters_mode = config["adapters"]["mode"] adapters_mode = config["adapters"]["mode"]
adapters_min = config["adapters"]["m"] adapters_min = config["adapters"]["m"]
...@@ -123,7 +122,23 @@ if config["adapters"]["remove"] : ...@@ -123,7 +122,23 @@ if config["adapters"]["remove"] :
else: else:
adapters_output = input_data adapters_output = input_data
#----------------------------------
# bowtie2 MAPPING on Ribo only
#----------------------------------
if config["genome"]["rRNA_mapping"] :
sortmerna_input = adapters_output
sortmerna_fasta = config["genome"]["ribo_fasta_file"]
sortmerna_outfile_rRNA = "02-Mapping/Ribo/{SAMPLE}_rRNA.fastq.gz"
sortmerna_outfile_no_rRNA = "02-Mapping/Ribo/{SAMPLE}_norRNA.fastq.gz"
sortmerna_logs_err = "02-Mapping/Ribo/logs/{SAMPLE}_mapping.err"
sortmerna_logs_out = "02-Mapping/Ribo/logs/{SAMPLE}_mapping.out"
final_output.extend(expand(sortmerna_outfile_no_rRNA, SAMPLE=samples))
include: os.path.join(RULES, "sortmerna.rules")
#---------------------------------- #----------------------------------
# genome gestion # genome gestion
#---------------------------------- #----------------------------------
...@@ -132,24 +147,20 @@ ref = [ config["genome"]["name"] ] ...@@ -132,24 +147,20 @@ ref = [ config["genome"]["name"] ]
if config["genome"]["host_mapping"]: if config["genome"]["host_mapping"]:
ref += [ config["genome"]["host_name"] ] ref += [ config["genome"]["host_name"] ]
if config["genome"]["rRNA_mapping"]:
ref += [ "Ribo" ]
def mapping_index(wildcards): def mapping_index(wildcards):
if (wildcards.REF == config["genome"]["name"]): if (wildcards.REF == config["genome"]["name"]):
return {"fasta": config["genome"]["fasta_file"]} return {"fasta": config["genome"]["fasta_file"]}
elif (wildcards.REF == config["genome"]["host_name"]): elif (wildcards.REF == config["genome"]["host_name"]):
return {"fasta": config["genome"]["host_fasta_file"]} return {"fasta": config["genome"]["host_fasta_file"]}
elif (wildcards.REF == "Ribo" ):
return {"fasta": config["genome"]["ribo_fasta_file"]}
def mapping_prefix(wildcards): def mapping_prefix(wildcards):
if (wildcards.REF == config["genome"]["name"]): if (wildcards.REF == config["genome"]["name"]):
return {"prefix": config["genome"]["name"]} return {"prefix": config["genome"]["name"]}
elif (wildcards.REF == config["genome"]["host_name"]): elif (wildcards.REF == config["genome"]["host_name"]):
return {"prefix": config["genome"]["host_name"]} return {"prefix": config["genome"]["host_name"]}
elif (wildcards.REF == "Ribo" ):
return {"prefix": "ribo"}
def annot_index(wildcards): def annot_index(wildcards):
...@@ -157,8 +168,6 @@ def annot_index(wildcards): ...@@ -157,8 +168,6 @@ def annot_index(wildcards):
return {"gff_file": config["genome"]["gff_file"]} return {"gff_file": config["genome"]["gff_file"]}
elif (wildcards.REF == config["genome"]["host_name"]): elif (wildcards.REF == config["genome"]["host_name"]):
return {"gff_file": config["genome"]["host_gff_file"]} return {"gff_file": config["genome"]["host_gff_file"]}
elif (wildcards.REF == "Ribo" ):
return {"gff_file": ""}
#---------------------------------- #----------------------------------
...@@ -173,15 +182,8 @@ if config["bowtie2_mapping"]["do"]: ...@@ -173,15 +182,8 @@ if config["bowtie2_mapping"]["do"]:
# indexing for bowtie2 # indexing for bowtie2
bowtie2_index_fasta = unpack(mapping_index) bowtie2_index_fasta = unpack(mapping_index)
bowtie2_index_log = "02-Mapping/bowtie2/logs/bowtie2_{REF}_indexing.log" bowtie2_index_log = "02-Mapping/bowtie2/logs/bowtie2_{REF}_indexing.log"
if config["genome"]["host_mapping"] and config["genome"]["rRNA_mapping"] : bowtie2_index_output_done = os.path.join(config["genome"]["genome_directory"]+"{REF}/bowtie2/{REF}.1.bt2")
bowtie2_index_output_done = os.path.join(config["genome"]["genome_directory"],"{}/bowtie2/{{REF}}.1.bt2".format(config["genome"]["host_name"])) bowtie2_index_output_prefix = os.path.join(config["genome"]["genome_directory"]+"{REF}/bowtie2/{REF}")
bowtie2_index_output_prefix = os.path.join(config["genome"]["genome_directory"]+"{}/bowtie2/{{REF}}".format(config["genome"]["host_name"]))
elif not config["genome"]["host_mapping"] and config["genome"]["rRNA_mapping"] :
bowtie2_index_output_done = os.path.join(config["genome"]["genome_directory"]+"{}/bowtie2/{{REF}}.1.bt2".format(config["genome"]["name"]))
bowtie2_index_output_prefix = os.path.join(config["genome"]["genome_directory"]+"{}/bowtie2/{{REF}}".format(config["genome"]["name"]))
else:
bowtie2_index_output_done = os.path.join(config["genome"]["genome_directory"]+"{REF}/bowtie2/{REF}.1.bt2")
bowtie2_index_output_prefix = os.path.join(config["genome"]["genome_directory"]+"{REF}/bowtie2/{REF}")
include: os.path.join(RULES, "bowtie2_index.rules") include: os.path.join(RULES, "bowtie2_index.rules")
...@@ -210,16 +212,8 @@ if config["star_mapping"]["do"]: ...@@ -210,16 +212,8 @@ if config["star_mapping"]["do"]:
star_index_fasta = unpack(mapping_index) star_index_fasta = unpack(mapping_index)
star_mapping_splice_file = unpack(annot_index) star_mapping_splice_file = unpack(annot_index)
star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log" star_index_log = "02-Mapping/STAR/logs/STAR_{REF}_indexing.log"
star_index_output_done = config["genome"]["genome_directory"]+"{REF}/STAR/{REF}.1.bt2"
if config["genome"]["host_mapping"] and config["genome"]["rRNA_mapping"] : star_index_output_dir = config["genome"]["genome_directory"]+"{REF}/STAR/{REF}"
star_index_output_done = config["genome"]["genome_directory"]+"{}/STAR/{{REF}}.1.bt2".format(config["genome"]["host_name"])
star_index_output_dir = config["genome"]["genome_directory"]+"{}/STAR/{{REF}}".format(config["genome"]["host_name"])
elif not config["genome"]["host_mapping"] and config["genome"]["rRNA_mapping"] :
star_index_output_done = config["genome"]["genome_directory"]+"{}/STAR/{{REF}}.1.bt2".format(config["genome"]["name"])
star_index_output_dir = config["genome"]["genome_directory"]+"{}/STAR/{{REF}}".format(config["genome"]["name"])
else:
star_index_output_done = config["genome"]["genome_directory"]+"{REF}/STAR/{REF}.1.bt2"
star_index_output_dir = config["genome"]["genome_directory"]+"{REF}/STAR/{REF}"
include: os.path.join(RULES, "star_index.rules") include: os.path.join(RULES, "star_index.rules")
...@@ -250,9 +244,7 @@ if config["star_mapping"]["do"]: ...@@ -250,9 +244,7 @@ if config["star_mapping"]["do"]:
include: os.path.join(RULES, "star_mapping_pass2.rules") include: os.path.join(RULES, "star_mapping_pass2.rules")
#---------------------------------- #----------------------------------
# counting step # counting step
#---------------------------------- #----------------------------------
...@@ -285,7 +277,7 @@ if config["feature_counts"]["do"]: ...@@ -285,7 +277,7 @@ if config["feature_counts"]["do"]:
ref.pop() ref.pop()
feature_counts_input = counting_index feature_counts_input = counting_index
feature_counts_output_count = "03-Counting/{MAP}/{REF}/{SAMPLE}_{REF}_feature.out" feature_counts_output_count = "03-Counting/{REF}/{MAP}/{SAMPLE}_{REF}_feature.out"
feature_counts_gff = counting_gff feature_counts_gff = counting_gff
feature_counts_logs_err = "03-Counting/logs/{MAP}/{SAMPLE}_{REF}_feature.err" feature_counts_logs_err = "03-Counting/logs/{MAP}/{SAMPLE}_{REF}_feature.err"
feature_counts_logs_out = "03-Counting/logs/{MAP}/{SAMPLE}_{REF}_feature.out" feature_counts_logs_out = "03-Counting/logs/{MAP}/{SAMPLE}_{REF}_feature.out"
...@@ -299,8 +291,8 @@ if config["feature_counts"]["do"]: ...@@ -299,8 +291,8 @@ if config["feature_counts"]["do"]:
#---------------------------------- #----------------------------------
flagstat_input = counting_index flagstat_input = counting_index
flagstat_logs = "02-Mapping/Flagstats/{MAP}/{REF}/logs/{SAMPLE}_{REF}.out" flagstat_logs = "02-Mapping/Flagstats/{REF}/{MAP}/logs/{SAMPLE}_{REF}.out"
flagstat_output = "02-Mapping/Flagstats/{MAP}/{REF}/{SAMPLE}_{REF}_stats.out" flagstat_output = "02-Mapping/Flagstats/{REF}/{MAP}/{SAMPLE}_{REF}_stats.out"
final_output.extend(expand(flagstat_output, SAMPLE=samples, REF=ref, MAP=mapper)) final_output.extend(expand(flagstat_output, SAMPLE=samples, REF=ref, MAP=mapper))
include: os.path.join(RULES, "flagstat.rules") include: os.path.join(RULES, "flagstat.rules")
...@@ -310,13 +302,37 @@ include: os.path.join(RULES, "flagstat.rules") ...@@ -310,13 +302,37 @@ include: os.path.join(RULES, "flagstat.rules")
#---------------------------------- #----------------------------------
if config["bamCoverage"]["do"]: if config["bamCoverage"]["do"]:
bamCoverage_input = counting_index bamCoverage_input = counting_index
bamCoverage_logs = "04-Coverage/{MAP}/{REF}/logs/{SAMPLE}_{REF}.out" bamCoverage_logs = "04-Coverage/{REF}/{MAP}/logs/{SAMPLE}_{REF}.out"
bamCoverage_output = "04-Coverage/{MAP}/{REF}/{SAMPLE}_{REF}.bw" bamCoverage_output = "04-Coverage/{REF}/{MAP}/{SAMPLE}_{REF}.bw"
bamCoverage_options = config['bamCoverage']['options'] bamCoverage_options = config['bamCoverage']['options']
final_output.extend(expand(bamCoverage_output, SAMPLE=samples, REF=ref, MAP=mapper)) final_output.extend(expand(bamCoverage_output, SAMPLE=samples, REF=ref, MAP=mapper))
include: os.path.join(RULES, "bamCoverage.rules") include: os.path.join(RULES, "bamCoverage.rules")
#----------------------------------
# PseudoMapping with Kallisto
#----------------------------------
if config["pseudomapping"]["do"]:
kallisto_index_fasta = config["pseudomapping"]["fasta"]
kallisto_index_output = (os.path.splitext(config["pseudomapping"]["fasta"])[0])+".idx"
kallisto_index_kmer = config["pseudomapping"]["kmer"]
kallisto_index_log = "02-Mapping/STAR/logs/Kallisto_indexing.log"
include: os.path.join(RULES, "kallisto_index.rules")
#pseudo mapping
kallisto_quant_fastq = adapters_output
kallisto_quant_index = (os.path.splitext(config["pseudomapping"]["fasta"])[0])+".idx"
kallisto_quant_output_dir = "02-Mapping/Kallisto/{SAMPLE}"
kallisto_quant_options = config["pseudomapping"]["options"]
kallisto_quant_gtf = config["pseudomapping"]["gtf"]
kallisto_pseudo_log = "02-Mapping/Kallisto/logs/{SAMPLE}_pseudomap.log"
final_output.extend(expand(kallisto_quant_output_dir, SAMPLE=samples))
include: os.path.join(RULES, "kallisto_quant.rules")
#---------------------------------- #----------------------------------
# MultiQC report # MultiQC report
#---------------------------------- #----------------------------------
......
...@@ -3,6 +3,10 @@ ...@@ -3,6 +3,10 @@
{ {
"ram" : "10G" "ram" : "10G"
}, },
"multiqc" :
{
"ram" : "20G"
},
"star_mapping_pass1" : "star_mapping_pass1" :
{ {
"ram" : "32G" "ram" : "32G"
......
...@@ -100,8 +100,8 @@ fastqc: ...@@ -100,8 +100,8 @@ fastqc:
adapters: adapters:
remove: yes remove: yes
tool: alienTrimmer # could be alienTrimmer tool: alienTrimmer # could be alienTrimmer or cutadapt
adapter_list: file:config/TruSeq_Stranded_RNA.fa cutadapt_file: file:config/TruSeq_Stranded_RNA.fa
alien_file: config/TruSeq_Stranded_RNA.fa alien_file: config/TruSeq_Stranded_RNA.fa
m: 25 m: 25
mode: a mode: a
...@@ -136,11 +136,32 @@ bowtie2_mapping: ...@@ -136,11 +136,32 @@ bowtie2_mapping:
star_mapping: star_mapping:
do: no do: yes
# for small genome, STAR may abort with segmentation fault. Setting sjdb Over hang to 250 skips this issue # for small genome, STAR may abort with segmentation fault. Setting sjdb Over hang to 250 skips this issue
options: "--outFilterMismatchNoverLmax 0.05 --outSAMunmapped Within --sjdbOverhang 250" options: "--outFilterMismatchNoverLmax 0.05 --outSAMunmapped Within --sjdbOverhang 250"
threads: 4 threads: 4
#===============================================================================
# pseudo mapping with kallisto
#
# :Parameters:
#
# - fasta: Fasta file for the kallisto index to be used for quantification
# - gtf: GTF file for transcriptome information (required for --genomebam)
# - kmer: k-mer (odd) length (default: 31, max value: 31)
# - options: any options recognised by bowtie2 tool
# - threads: number of threads to be used
#===============================================================================
pseudomapping:
do: yes
fasta: ../genome/hg38/hg38.fa
gtf: ../genome/hg38/hg38.gff
options: ""
kmer: 31
threads: 4
############################################################################# #############################################################################
# feature_counts used to count reads against features # feature_counts used to count reads against features
# #
......
...@@ -32,7 +32,7 @@ rule cutadapt: ...@@ -32,7 +32,7 @@ rule cutadapt:
params: params:
wkdir = adapters_wkdir, wkdir = adapters_wkdir,
options = adapters_options, options = adapters_options,
adapters = adapters_adapt_list, adapters = config['adapters']['cutadapt_file'],
mode = adapters_mode, mode = adapters_mode,
min = adapters_min, min = adapters_min,
qual = adapters_qual qual = adapters_qual
......
...@@ -35,7 +35,7 @@ rule kallisto_index: ...@@ -35,7 +35,7 @@ rule kallisto_index:
log: log:
kallisto_index_log kallisto_index_log
threads: threads:
config['kallisto']['threads'] config['pseudomapping']['threads']
envmodules: envmodules:
"kallisto/0.46.2" "kallisto/0.46.2"
shell: shell:
......
...@@ -30,18 +30,22 @@ rule kallisto_quant: ...@@ -30,18 +30,22 @@ rule kallisto_quant:
output: output:
directory(kallisto_quant_output_dir) directory(kallisto_quant_output_dir)
params: params:
options = kallisto_quant_options options = kallisto_quant_options,
gtf = kallisto_quant_gtf
singularity: singularity:
"rnaflow.img" "rnaflow.img"
log: log:
kallisto_pseudo_log kallisto_pseudo_log
threads: threads:
config['kallisto']['threads'] config['pseudomapping']['threads']
envmodules: envmodules:
"kallisto/0.46.2" "kallisto/0.46.2"
shell: shell:
""" """
if [[ -s {params.gtf} ]]
kallisto quant -t {threads} {params.options} -o {output} -i {input.index} {input.fastq} then
kallisto quant -t {threads} {params.options} -o {output} -i {input.index} {input.fastq} --gtf {params.gtf}
else
kallisto quant -t {threads} {params.options} -o {output} -i {input.index} {input.fastq}
fi
""" """
#########################################################################
# RNAflow: an automated pipeline to analyse transcriptomic data #
# #
# Authors: Rachel Legendre #
# Copyright (c) 2021-2022 Institut Pasteur (Paris). #
# #
# This file is part of RNAflow workflow. #
# #
# RNAflow is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# RNAflow is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details . #
# #
# You should have received a copy of the GNU General Public License #
# along with RNAflow (LICENSE). #
# If not, see <https://www.gnu.org/licenses/>. #
#########################################################################
rule sortmerna:
input:
fastq = sortmerna_input,
fasta = sortmerna_fasta
output:
rRNA = sortmerna_outfile_rRNA,
no_rRNA = sortmerna_outfile_no_rRNA
singularity:
"rnaflow.img"
log:
err = sortmerna_logs_err,
out = sortmerna_logs_out
shadow: "shallow"
threads: 6
envmodules:
"sortmerna/2.1b"
shell:
"""
set +o pipefail
#tmp="{input.fastq}"
#infiles=($tmp)
fasta={input.fasta}
index=${{fasta%%.fa}}
if [[ ! -s ${{index}}.stats ]]
then
indexdb_rna --ref ${{fasta}},${{index}}
fi
sortmerna --ref $${{fasta}},${{index}} -a {threads} --reads {input.fastq} --aligned outfile_rRNA --fastx --sam --num_alignments 1 --other outfile_noRNA --log -v > {log.out} 2> {log.err}
pigz -fc outfile_rRNA > {output.rRNA}
pigz -fc outfile_noRNA {output.no_rRNA}
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment