Skip to content
Snippets Groups Projects
Commit 38125af0 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion :recycle:
Browse files

add workflow for strainphlan on paired-end

parent bd2e900e
No related branches found
No related tags found
1 merge request!6update metaphlan3 and add strainphlan
# StrainPhlan
This describes a workflow to run [strainphlan](https://github.com/biobakery/MetaPhlAn/wiki/StrainPhlAn-3.0)
In brief it contains 4 steps:
* Run Metaphlan on all samples (paired-ends)
* Extract markers from metaphlan outputs
* Run StrainPhlan on all selected clades
\ No newline at end of file
configfile: "config.yaml"
# ==== Snakefile paths ====
__metaphlan3_rules = config.get("snakefiles", {}).get("metaphlan3")
__metaphlan2_merge_rules = config.get("snakefiles", {}).get("metaphlan2_merge")
__sample2markers_rules = config.get("snakefiles", {}).get("sample2markers")
__strainphlan_rules = config.get("snakefiles", {}).get("strainphlan")
__input_dir = config['input_dir']
__main_output_dir = config.get('output_dir', 'output')
# ---- Metaphlan3
__metaphlan3_output_dir = __main_output_dir + "/metaphlan3"
__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq')
__metaphlan3_input_r1 = "{dir}/{sample}{ext}".format(dir=__input_dir,
sample="{sample}",
ext="_1.fq.gz")
__metaphlan3_input_r2 = "{dir}/{sample}{ext}".format(dir=__input_dir,
sample="{sample}",
ext="_2.fq.gz")
__metaphlan3_output_profile = "{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir,
sample="{sample}")
__metaphlan3_output_bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}")
__metaphlan3_output_sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}")
include: __metaphlan3_rules
# ---- Metaphlan2 merge
__metaphlan2_merge_output_dir = __main_output_dir + "/metaphlan_merge"
__metaphlan2_merge_output_file_name = config['metaphlan2_merge'].get('output_file_name',"merged_taxonomic_profiles.txt")
__metaphlan2_merge_input = expand("{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir,
sample="{sample}"),
sample=config['samples'])
__metaphlan2_merge_output = "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir,
file_name=__metaphlan2_merge_output_file_name)
include: __metaphlan2_merge_rules
rule metaphlan_merge_all:
input:
"{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir,
file_name=__metaphlan2_merge_output_file_name)
# ---- Sample2markers ----
__sample2markers_main_output_dir = __main_output_dir + "/sample2markers"
__sample2markers_output_dir = "{dir}/{sample}".format(dir=__sample2markers_main_output_dir,
sample="{sample}")
__sample2markers_input = __metaphlan3_output_sams
__sample2markers_output = "{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir,
sample="{sample}")
include: __sample2markers_rules
# ---- StrainPhlan ----
__strainphlan_main_output_dir = __main_output_dir + "/strainphlan"
__strainphlan_clade = "{clade}"
__strainphlan_output_dir = "{dir}/{clade}".format(dir=__strainphlan_main_output_dir,
clade="{clade}")
__strainphlan_input = expand("{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir,
sample="{sample}"),
sample=config['samples'])
__strainphlan_output = "{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir,
prefix="RAxML_bestTree",
clade="{clade}",
ext="StrainPhlAn3.tre")
include: __strainphlan_rules
rule all:
input:
expand("{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir,
prefix="RAxML_bestTree",
clade="{clade}",
ext="StrainPhlAn3.tre"),
clade=config['strainphlan']['clades'])
snakefiles:
metaphlan3: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan3/metaphlan/paired/Snakefile
metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile
sample2markers: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/sample2markers/Snakefile
strainphlan: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/strainphlan/Snakefile
samples:
- sample_1
- sample_2
- sample_2
input_dir: /a/path/to/input/data
output_dir: output
metaphlan3:
threads: 4
input_type: fastq
options: --bowtie2db /pasteur/zeus/projets/p02/metasig/DBs/metaphlan/metaphlan3bowtie2db/ --index mpa_v30_CHOCOPhlAn_201901
metaphlan2_merge:
threads: 1
sample2markers:
threads: 4
strainphlan:
threads: 8
options: ""
clades:
- s__Escherichia_coli
- s__Klebsiella_pneumoniae
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment