diff --git a/workflows/strainphlan/README.md b/workflows/strainphlan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1bd172722b8e94154abf164bd65b4a540883bf9e --- /dev/null +++ b/workflows/strainphlan/README.md @@ -0,0 +1,9 @@ +# StrainPhlan + +This describes a workflow to run [strainphlan](https://github.com/biobakery/MetaPhlAn/wiki/StrainPhlAn-3.0) + +In brief it contains 4 steps: + +* Run Metaphlan on all samples (paired-ends) +* Extract markers from metaphlan outputs +* Run StrainPhlan on all selected clades \ No newline at end of file diff --git a/workflows/strainphlan/Snakefile b/workflows/strainphlan/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..f7eb095548c619bebdecbea7b0a7356b4a260726 --- /dev/null +++ b/workflows/strainphlan/Snakefile @@ -0,0 +1,78 @@ +configfile: "config.yaml" + +# ==== Snakefile paths ==== +__metaphlan3_rules = config.get("snakefiles", {}).get("metaphlan3") +__metaphlan2_merge_rules = config.get("snakefiles", {}).get("metaphlan2_merge") +__sample2markers_rules = config.get("snakefiles", {}).get("sample2markers") +__strainphlan_rules = config.get("snakefiles", {}).get("strainphlan") + +__input_dir = config['input_dir'] +__main_output_dir = config.get('output_dir', 'output') + +# ---- Metaphlan3 +__metaphlan3_output_dir = __main_output_dir + "/metaphlan3" +__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') +__metaphlan3_input_r1 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_1.fq.gz") +__metaphlan3_input_r2 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_2.fq.gz") +__metaphlan3_output_profile = "{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}") +__metaphlan3_output_bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") +__metaphlan3_output_sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") + +include: __metaphlan3_rules + +# ---- Metaphlan2 merge +__metaphlan2_merge_output_dir = __main_output_dir + "/metaphlan_merge" +__metaphlan2_merge_output_file_name = config['metaphlan2_merge'].get('output_file_name',"merged_taxonomic_profiles.txt") +__metaphlan2_merge_input = expand("{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}"), + sample=config['samples']) +__metaphlan2_merge_output = "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) +include: __metaphlan2_merge_rules + +rule metaphlan_merge_all: + input: + "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) + +# ---- Sample2markers ---- +__sample2markers_main_output_dir = __main_output_dir + "/sample2markers" +__sample2markers_output_dir = "{dir}/{sample}".format(dir=__sample2markers_main_output_dir, + sample="{sample}") +__sample2markers_input = __metaphlan3_output_sams +__sample2markers_output = "{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir, + sample="{sample}") + +include: __sample2markers_rules + + +# ---- StrainPhlan ---- + +__strainphlan_main_output_dir = __main_output_dir + "/strainphlan" +__strainphlan_clade = "{clade}" +__strainphlan_output_dir = "{dir}/{clade}".format(dir=__strainphlan_main_output_dir, + clade="{clade}") + +__strainphlan_input = expand("{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir, + sample="{sample}"), + sample=config['samples']) +__strainphlan_output = "{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir, + prefix="RAxML_bestTree", + clade="{clade}", + ext="StrainPhlAn3.tre") + +include: __strainphlan_rules + + +rule all: + input: + expand("{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir, + prefix="RAxML_bestTree", + clade="{clade}", + ext="StrainPhlAn3.tre"), + clade=config['strainphlan']['clades']) diff --git a/workflows/strainphlan/config.yaml b/workflows/strainphlan/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..788a77d9f2627a9c54b18ca6c6e95102ea813210 --- /dev/null +++ b/workflows/strainphlan/config.yaml @@ -0,0 +1,31 @@ +snakefiles: + metaphlan3: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan3/metaphlan/paired/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + sample2markers: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/sample2markers/Snakefile + strainphlan: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/strainphlan/Snakefile + +samples: + - sample_1 + - sample_2 + - sample_2 + +input_dir: /a/path/to/input/data +output_dir: output + +metaphlan3: + threads: 4 + input_type: fastq + options: --bowtie2db /pasteur/zeus/projets/p02/metasig/DBs/metaphlan/metaphlan3bowtie2db/ --index mpa_v30_CHOCOPhlAn_201901 + +metaphlan2_merge: + threads: 1 + +sample2markers: + threads: 4 + +strainphlan: + threads: 8 + options: "" + clades: + - s__Escherichia_coli + - s__Klebsiella_pneumoniae