From 17767e090ee3e3267d5286a811a987522661bc8c Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Mon, 14 Dec 2020 12:28:05 +0100 Subject: [PATCH 1/5] update metaphlan3 pipeline --- tools/metaphlan3/metaphlan/paired/Snakefile | 2 +- workflows/metaphlan3/README.md | 8 +++ .../metaphlan3/paired_metaphlan2/Snakefile | 56 +++++++++++++++++++ .../metaphlan3/paired_metaphlan2/config.yaml | 43 ++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 workflows/metaphlan3/README.md create mode 100644 workflows/metaphlan3/paired_metaphlan2/Snakefile create mode 100644 workflows/metaphlan3/paired_metaphlan2/config.yaml diff --git a/tools/metaphlan3/metaphlan/paired/Snakefile b/tools/metaphlan3/metaphlan/paired/Snakefile index 5517590..87f4be7 100644 --- a/tools/metaphlan3/metaphlan/paired/Snakefile +++ b/tools/metaphlan3/metaphlan/paired/Snakefile @@ -18,7 +18,7 @@ rule metaphlan3_paired: r1 = __metaphlan3_input_r1, r2 = __metaphlan3_input_r2 output: - profile = __metaphlan3_output, + profile = __metaphlan3_output_profile, bowtie2out = __metaphlan3_output_bowtie2out, sams = __metaphlan3_output_sams params: diff --git a/workflows/metaphlan3/README.md b/workflows/metaphlan3/README.md new file mode 100644 index 0000000..d1dc622 --- /dev/null +++ b/workflows/metaphlan3/README.md @@ -0,0 +1,8 @@ +# Simple metaphlan3 workflows + +Workflows using metaphlan3 and simple visualization of the results. + +All examples presented were made for our TARS cluster system. This means you will be likely to find some +absolute path into the `config.yaml` that you might not have access to. + +For every workflow, an example is provided and is based on the `config.yaml` file. Singularity images are necessary for these examples. diff --git a/workflows/metaphlan3/paired_metaphlan2/Snakefile b/workflows/metaphlan3/paired_metaphlan2/Snakefile new file mode 100644 index 0000000..914d5c8 --- /dev/null +++ b/workflows/metaphlan3/paired_metaphlan2/Snakefile @@ -0,0 +1,56 @@ +configfile: "config.yaml" + +# ==== Snakefile paths ==== +__metaphlan3_rules = config.get("snakefiles", {}).get("metaphlan3", "../../tools/metaphlan2/metaphlan2/Snakefile") +__metaphlan2_merge_rules = config.get("snakefiles", {}).get("metaphlan2_merge", "../../tools/metaphlan2/metaphlan2_merge/Snakefile") +__metaphlan2_heatmap_rules = config.get("snakefiles", {}).get("metaphlan2_heatmap", "../../tools/metaphlan2/metaphlan2_heatmap/Snakefile") +__graphlan_from_metaphlan2_rules = config.get("snakefiles", {}).get("graphlan_from_metaphlan2", "../subworkflows/graphlan_from_metaphlan2/Snakefile") + +__input_dir = config['input_dir'] +__main_output_dir = config.get('output_dir', 'output') + +# ---- Metaphlan3 +__metaphlan3_output_dir = __main_output_dir + "/metaphlan3" +__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') +__metaphlan3_input_r1 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_1.fq.gz") +__metaphlan3_input_r2 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_2.fq.gz") +__metaphlan3_output_profile = "{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}") +__metaphlan3_output_bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") +__metaphlan3_output_sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") + +include: __metaphlan3_rules + +# ---- Metaphlan2 merge +__metaphlan2_merge_output_dir = __main_output_dir + "/metaphlan_merge" +__metaphlan2_merge_output_file_name = config['metaphlan2_merge'].get('output_file_name',"merged_taxonomic_profiles.txt") +__metaphlan2_merge_input = __metaphlan3_output_profile +__metaphlan2_merge_output = "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) +include: __metaphlan2_merge_rules + +# ---- Metaphlan2 heatmap +__metaphlan2_heatmap_output_dir = __main_output_dir + "/metaphlan2_heatmap" +__metaphlan2_heatmap_output_file_name = config['metaphlan2_heatmap'].get('output_name',"heatmap.png") +__metaphlan2_heatmap_input = __metaphlan2_merge_output +__metaphlan2_heatmap_output = "{dir}/{file_name}".format(dir=__metaphlan2_heatmap_output_dir, + file_name=__metaphlan2_heatmap_output_file_name) +include: __metaphlan2_heatmap_rules + +# ---- Graphlan Dendogram +__graphlan_from_metaphlan2_output_dir = __main_output_dir + "/graphlan" +__graphlan_from_metaphlan2_output_file_name = config.get("graphlan_from_metaphlan2", {}).get('output_name',"dendrogram.png") +__graphlan_from_metaphlan2_input = __metaphlan2_merge_output +__graphlan_from_metaphlan2_output = "{dir}/{file_name}".format(dir=__graphlan_from_metaphlan2_output_dir, + file_name=__graphlan_from_metaphlan2_output_file_name) +include: __graphlan_from_metaphlan2_rules + +rule all: + input: + heatmap = __metaphlan2_heatmap_output, + dendogram = __graphlan_from_metaphlan2_output + diff --git a/workflows/metaphlan3/paired_metaphlan2/config.yaml b/workflows/metaphlan3/paired_metaphlan2/config.yaml new file mode 100644 index 0000000..3f6af03 --- /dev/null +++ b/workflows/metaphlan3/paired_metaphlan2/config.yaml @@ -0,0 +1,43 @@ +snakefiles: + metaphlan3: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan3/metaphlan/paired/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + metaphlan2_heatmap: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile + graphlan_from_metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile + +samples: + - sample_1 + - sample_2 + - sample_2 + +input_dir: /a/path/to/input/data +output_dir: metaphlan2_output + +metaphlan3: + modules: singularity + threads: 4 + input_type: fastq + options: --bowtie2db /pasteur/gaia/projets/p01/Atm/DBs/bowtie2/metaphlan2/ + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/metaphlan2/from_docker/metaphlan2_2.7.7_s3.2.1.simg metaphlan2.py + +metaphlan2_merge: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/metaphlan2/from_docker/metaphlan2_2.7.7_s3.2.1.simg merge_metaphlan_tables.py + +metaphlan2_heatmap: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/metaphlan2/from_docker/metaphlan2_2.6.0_s3.2.1.simg metaphlan_hclust_heatmap.py + output_name: snakemake_heatmap.png + +export2graphlan: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/graphlan/from_docker/graphlan_0.9.7_s3.2.1.simg export2graphlan.py + options: "--skip_rows 1,2 --most_abundant 100 --abundance_threshold 1 --least_biomarkers 10 --annotations 5,6 --external_annotations 7 --min_clade_size 1" + +graphlan_annotate: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/graphlan/from_docker/graphlan_0.9.7_s3.2.1.simg graphlan_annotate.py + +graphlan: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/graphlan/from_docker/graphlan_0.9.7_s3.2.1.simg graphlan.py + options: "--dpi 300 --external_legends" -- GitLab From 8a7ec2e8a4101f8b88d088dcc2bd4903829ae944 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Mon, 14 Dec 2020 14:48:22 +0100 Subject: [PATCH 2/5] update paths from tars to maestro --- tools/eggnogmapper2/annotate/example_usage/config.yaml | 2 +- tools/eggnogmapper2/diamond/example_usage/config.yaml | 2 +- tools/utils/cat/example_usage/config.yaml | 2 +- tools/utils/split_fasta/example_usage/config.yaml | 2 +- workflows/eggnogmapperv2/config.yaml | 8 ++++---- workflows/metaphlan2/paired_metaphlan2/config.yaml | 8 ++++---- workflows/metaphlan2/single_metaphlan2/config.yaml | 8 ++++---- workflows/metaphlan3/paired_metaphlan2/Snakefile | 8 ++++---- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/eggnogmapper2/annotate/example_usage/config.yaml b/tools/eggnogmapper2/annotate/example_usage/config.yaml index c7db75d..8e808da 100644 --- a/tools/eggnogmapper2/annotate/example_usage/config.yaml +++ b/tools/eggnogmapper2/annotate/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - eggnogmapper2_annotate: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/annotate/Snakefile + eggnogmapper2_annotate: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/annotate/Snakefile input_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output/eggnogmapper2/diamond/ output_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/tools/eggnogmapper2/diamond/example_usage/config.yaml b/tools/eggnogmapper2/diamond/example_usage/config.yaml index 5fe001f..1db9102 100644 --- a/tools/eggnogmapper2/diamond/example_usage/config.yaml +++ b/tools/eggnogmapper2/diamond/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - eggnogmapper2_diamond: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/diamond/Snakefile + eggnogmapper2_diamond: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/diamond/Snakefile input_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output/split_fasta output_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/tools/utils/cat/example_usage/config.yaml b/tools/utils/cat/example_usage/config.yaml index b0d0444..fc1b43b 100644 --- a/tools/utils/cat/example_usage/config.yaml +++ b/tools/utils/cat/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - cat: /pasteur/projets/policy01/Atm/snakemake/tools/utils/cat/Snakefile + cat: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/cat/Snakefile input_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output/eggnogmapper2/diamond/ output_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/tools/utils/split_fasta/example_usage/config.yaml b/tools/utils/split_fasta/example_usage/config.yaml index 559a789..6b47f03 100644 --- a/tools/utils/split_fasta/example_usage/config.yaml +++ b/tools/utils/split_fasta/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - split_fasta: /pasteur/projets/policy01/Atm/snakemake/tools/utils/split_fasta/Snakefile + split_fasta: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/split_fasta/Snakefile input_fasta: /pasteur/projets/policy01/DBs/IGC/2014-9.9M/IGC.fa output_dir: /pasteur/projets/policy01/sandbox/20200210_test_snakemake/output diff --git a/workflows/eggnogmapperv2/config.yaml b/workflows/eggnogmapperv2/config.yaml index 10bd4b1..4e3f44d 100644 --- a/workflows/eggnogmapperv2/config.yaml +++ b/workflows/eggnogmapperv2/config.yaml @@ -1,8 +1,8 @@ snakefiles: - eggnogmapper2_diamond: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/diamond/Snakefile - split_fasta: /pasteur/projets/policy01/Atm/snakemake/tools/utils/split_fasta/Snakefile - cat: /pasteur/projets/policy01/Atm/snakemake/tools/utils/cat/Snakefile - eggnogmapper2_annotate: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/annotate/Snakefile + eggnogmapper2_diamond: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/diamond/Snakefile + split_fasta: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/split_fasta/Snakefile + cat: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/cat/Snakefile + eggnogmapper2_annotate: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/annotate/Snakefile input_fasta: /pasteur/homes/kehillio/Atm/kenzo/sandbox/20200210_test_snakemake/test.fa output_dir: /pasteur/homes/kehillio/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/workflows/metaphlan2/paired_metaphlan2/config.yaml b/workflows/metaphlan2/paired_metaphlan2/config.yaml index 21b4cbd..077607a 100644 --- a/workflows/metaphlan2/paired_metaphlan2/config.yaml +++ b/workflows/metaphlan2/paired_metaphlan2/config.yaml @@ -1,8 +1,8 @@ snakefiles: - metaphlan2: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2/paired/Snakefile - metaphlan2_merge: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile - metaphlan2_heatmap: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile - graphlan_from_metaphlan2: /pasteur/projets/policy01/Atm/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile + metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2/paired/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + metaphlan2_heatmap: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile + graphlan_from_metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile samples: - sample_1 diff --git a/workflows/metaphlan2/single_metaphlan2/config.yaml b/workflows/metaphlan2/single_metaphlan2/config.yaml index 6d7b5bf..3d5dccc 100644 --- a/workflows/metaphlan2/single_metaphlan2/config.yaml +++ b/workflows/metaphlan2/single_metaphlan2/config.yaml @@ -1,8 +1,8 @@ snakefiles: - metaphlan2: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2/single/Snakefile - metaphlan2_merge: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile - metaphlan2_heatmap: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile - graphlan_from_metaphlan2: /pasteur/projets/policy01/Atm/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile + metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2/single/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + metaphlan2_heatmap: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile + graphlan_from_metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile samples: - sample_1 diff --git a/workflows/metaphlan3/paired_metaphlan2/Snakefile b/workflows/metaphlan3/paired_metaphlan2/Snakefile index 914d5c8..eecd4a4 100644 --- a/workflows/metaphlan3/paired_metaphlan2/Snakefile +++ b/workflows/metaphlan3/paired_metaphlan2/Snakefile @@ -13,11 +13,11 @@ __main_output_dir = config.get('output_dir', 'output') __metaphlan3_output_dir = __main_output_dir + "/metaphlan3" __metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') __metaphlan3_input_r1 = "{dir}/{sample}{ext}".format(dir=__input_dir, - sample="{sample}", - ext="_1.fq.gz") + sample="{sample}", + ext="_1.fq.gz") __metaphlan3_input_r2 = "{dir}/{sample}{ext}".format(dir=__input_dir, - sample="{sample}", - ext="_2.fq.gz") + sample="{sample}", + ext="_2.fq.gz") __metaphlan3_output_profile = "{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, sample="{sample}") __metaphlan3_output_bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") -- GitLab From 6e60eb1823fa13d28119a5313e0de92fae839683 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Mon, 14 Dec 2020 15:08:14 +0100 Subject: [PATCH 3/5] start structure for strainphlan tools --- tools/strainphlan/extract_markers/README.md | 18 ++++++ tools/strainphlan/strainphlan/README.md | 69 +++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 tools/strainphlan/extract_markers/README.md create mode 100644 tools/strainphlan/strainphlan/README.md diff --git a/tools/strainphlan/extract_markers/README.md b/tools/strainphlan/extract_markers/README.md new file mode 100644 index 0000000..3731821 --- /dev/null +++ b/tools/strainphlan/extract_markers/README.md @@ -0,0 +1,18 @@ +# extract_markers.py for strainphlan + +This step will extract the markers of selected species from MetaPhlAn database. + +### Help section + +``` +usage: extract_markers.py [-h] [-d DATABASE] [-c CLADE] [-o OUTPUT_DIR] + +optional arguments: + -h, --help show this help message and exit + -d DATABASE, --database DATABASE + The input MetaPhlAn dtabase + -c CLADE, --clade CLADE + The clades to investigate + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory +``` diff --git a/tools/strainphlan/strainphlan/README.md b/tools/strainphlan/strainphlan/README.md new file mode 100644 index 0000000..9def282 --- /dev/null +++ b/tools/strainphlan/strainphlan/README.md @@ -0,0 +1,69 @@ +# extract_markers.py for strainphlan + +This step will build the multiple sequence alignment and the phylogenetic tree for each species. + +### Help section + +``` +usage: strainphlan [-h] [-d DATABASE] [-m CLADE_MARKERS] + [-s SAMPLES [SAMPLES ...]] [-r REFERENCES [REFERENCES ...]] + [-c CLADE] [-o OUTPUT_DIR] [-n NPROCS] + [--secondary_samples SECONDARY_SAMPLES [SECONDARY_SAMPLES ...]] + [--secondary_references SECONDARY_REFERENCES [SECONDARY_REFERENCES ...]] + [--trim_sequences TRIM_SEQUENCES] + [--marker_in_n_samples MARKER_IN_N_SAMPLES] + [--sample_with_n_markers SAMPLE_WITH_N_MARKERS] + [--secondary_sample_with_n_markers SECONDARY_SAMPLE_WITH_N_MARKERS] + [--phylophlan_mode {accurate,fast}] + [--phylophlan_configuration PHYLOPHLAN_CONFIGURATION] + [--mutation_rates] [--print_clades_only] + +optional arguments: + -h, --help show this help message and exit + -d DATABASE, --database DATABASE + The input MetaPhlAn 3.0 database (default: /pasteur/so + nic/homes/kehillio/miniconda3/envs/mpa/lib/python3.7/s + ite-packages/metaphlan/metaphlan_databases/mpa_v30_CHO + COPhlAn_201901.pkl) + -m CLADE_MARKERS, --clade_markers CLADE_MARKERS + The clade markers as FASTA file (default: None) + -s SAMPLES [SAMPLES ...], --samples SAMPLES [SAMPLES ...] + The reconstructed markers for each sample (default: + []) + -r REFERENCES [REFERENCES ...], --references REFERENCES [REFERENCES ...] + The reference genomes (default: []) + -c CLADE, --clade CLADE + The clade to investigate (default: None) + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory (default: None) + -n NPROCS, --nprocs NPROCS + The number of threads to use (default: 1) + --secondary_samples SECONDARY_SAMPLES [SECONDARY_SAMPLES ...] + The reconstructed markers for each secondary sample + (default: []) + --secondary_references SECONDARY_REFERENCES [SECONDARY_REFERENCES ...] + The secondary reference genomes (default: []) + --trim_sequences TRIM_SEQUENCES + The number of bases to remove from both ends when + trimming markers (default: 50) + --marker_in_n_samples MARKER_IN_N_SAMPLES + Theshold defining the minimum percentage of samples to + keep a marker (default: 80) + --sample_with_n_markers SAMPLE_WITH_N_MARKERS + Threshold defining the minimun number of markers to + keep a sample (default: 20) + --secondary_sample_with_n_markers SECONDARY_SAMPLE_WITH_N_MARKERS + Threshold defining the minimun number of markers to + keep a secondary sample (default: 20) + --phylophlan_mode {accurate,fast} + The presets for fast or accurate phylogenetic analysis + (default: accurate) + --phylophlan_configuration PHYLOPHLAN_CONFIGURATION + The PhyloPhlAn configuration file (default: None) + --mutation_rates If specified will produced a mutation rates table for + each of the aligned markers and a summary table for + the concatenated MSA. This operation can take long + time to finish (default: False) + --print_clades_only If specified only print the potential clades and stop + without building any tree (default: False) +``` -- GitLab From bd2e900e69eb4fc28ee1963a2ac65b1e7fad046e Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Fri, 18 Dec 2020 10:25:25 +0100 Subject: [PATCH 4/5] add rule for strainphlan --- tools/strainphlan/sample2markers/README.md | 3 +++ tools/strainphlan/strainphlan/Snakefile | 25 ++++++++++++++++++++++ tools/strainphlan/strainphlan/config.yaml | 5 +++++ 3 files changed, 33 insertions(+) create mode 100644 tools/strainphlan/strainphlan/Snakefile create mode 100644 tools/strainphlan/strainphlan/config.yaml diff --git a/tools/strainphlan/sample2markers/README.md b/tools/strainphlan/sample2markers/README.md index 8ef1f3b..78053e7 100644 --- a/tools/strainphlan/sample2markers/README.md +++ b/tools/strainphlan/sample2markers/README.md @@ -2,6 +2,9 @@ This step will reconstruct all species strains found in metaphlan output sam file and store them in a pickle file (*.pkl). Those strains are referred as sample-reconstructed strains. +.. **Note**: output pkl file is written should be written in a individual directory since the script tries + to create and write into a `tmp` dir that leads to error when running sample2markers.py in parallel. + ### Help section ``` diff --git a/tools/strainphlan/strainphlan/Snakefile b/tools/strainphlan/strainphlan/Snakefile new file mode 100644 index 0000000..bf6d4ef --- /dev/null +++ b/tools/strainphlan/strainphlan/Snakefile @@ -0,0 +1,25 @@ +__strainphlan_exec_command = config.get('strainphlan', {}).get('exec_command', 'strainphlan') +__strainphlan_modules = config.get('strainphlan', {}).get('modules') +__strainphlan_options = config.get('strainphlan', {}).get('options', "") +__strainphlan_threads = config.get('strainphlan', {}).get('threads', 1) + + +rule strainphlan: + input: + __strainphlan_input + output: + __strainphlan_output + params: + exec_command = __strainphlan_exec_command, + modules = __strainphlan_modules, + clade = __strainphlan_clade, + output_dir = __strainphlan_output_dir, + options = __strainphlan_options + threads: + __strainphlan_threads + run: + command = [] + if params.modules: + command.append("module load {params.modules}") + command.append("{params.exec_command} -s {input} -n {threads} -c {params.clade} {params.options} -o {params.output_dir}") + shell(" && ".join(command)) diff --git a/tools/strainphlan/strainphlan/config.yaml b/tools/strainphlan/strainphlan/config.yaml new file mode 100644 index 0000000..9094c16 --- /dev/null +++ b/tools/strainphlan/strainphlan/config.yaml @@ -0,0 +1,5 @@ +input_dir: data + +strainphlan: + threads: 4 + exec_command: sample2markers.py -- GitLab From 38125af02a4147a91a1ae10e4646a7b6cfbd1fd4 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Tue, 29 Dec 2020 10:37:09 +0100 Subject: [PATCH 5/5] add workflow for strainphlan on paired-end --- workflows/strainphlan/README.md | 9 ++++ workflows/strainphlan/Snakefile | 78 +++++++++++++++++++++++++++++++ workflows/strainphlan/config.yaml | 31 ++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 workflows/strainphlan/README.md create mode 100644 workflows/strainphlan/Snakefile create mode 100644 workflows/strainphlan/config.yaml diff --git a/workflows/strainphlan/README.md b/workflows/strainphlan/README.md new file mode 100644 index 0000000..1bd1727 --- /dev/null +++ b/workflows/strainphlan/README.md @@ -0,0 +1,9 @@ +# StrainPhlan + +This describes a workflow to run [strainphlan](https://github.com/biobakery/MetaPhlAn/wiki/StrainPhlAn-3.0) + +In brief it contains 4 steps: + +* Run Metaphlan on all samples (paired-ends) +* Extract markers from metaphlan outputs +* Run StrainPhlan on all selected clades \ No newline at end of file diff --git a/workflows/strainphlan/Snakefile b/workflows/strainphlan/Snakefile new file mode 100644 index 0000000..f7eb095 --- /dev/null +++ b/workflows/strainphlan/Snakefile @@ -0,0 +1,78 @@ +configfile: "config.yaml" + +# ==== Snakefile paths ==== +__metaphlan3_rules = config.get("snakefiles", {}).get("metaphlan3") +__metaphlan2_merge_rules = config.get("snakefiles", {}).get("metaphlan2_merge") +__sample2markers_rules = config.get("snakefiles", {}).get("sample2markers") +__strainphlan_rules = config.get("snakefiles", {}).get("strainphlan") + +__input_dir = config['input_dir'] +__main_output_dir = config.get('output_dir', 'output') + +# ---- Metaphlan3 +__metaphlan3_output_dir = __main_output_dir + "/metaphlan3" +__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') +__metaphlan3_input_r1 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_1.fq.gz") +__metaphlan3_input_r2 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_2.fq.gz") +__metaphlan3_output_profile = "{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}") +__metaphlan3_output_bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") +__metaphlan3_output_sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") + +include: __metaphlan3_rules + +# ---- Metaphlan2 merge +__metaphlan2_merge_output_dir = __main_output_dir + "/metaphlan_merge" +__metaphlan2_merge_output_file_name = config['metaphlan2_merge'].get('output_file_name',"merged_taxonomic_profiles.txt") +__metaphlan2_merge_input = expand("{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}"), + sample=config['samples']) +__metaphlan2_merge_output = "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) +include: __metaphlan2_merge_rules + +rule metaphlan_merge_all: + input: + "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) + +# ---- Sample2markers ---- +__sample2markers_main_output_dir = __main_output_dir + "/sample2markers" +__sample2markers_output_dir = "{dir}/{sample}".format(dir=__sample2markers_main_output_dir, + sample="{sample}") +__sample2markers_input = __metaphlan3_output_sams +__sample2markers_output = "{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir, + sample="{sample}") + +include: __sample2markers_rules + + +# ---- StrainPhlan ---- + +__strainphlan_main_output_dir = __main_output_dir + "/strainphlan" +__strainphlan_clade = "{clade}" +__strainphlan_output_dir = "{dir}/{clade}".format(dir=__strainphlan_main_output_dir, + clade="{clade}") + +__strainphlan_input = expand("{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir, + sample="{sample}"), + sample=config['samples']) +__strainphlan_output = "{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir, + prefix="RAxML_bestTree", + clade="{clade}", + ext="StrainPhlAn3.tre") + +include: __strainphlan_rules + + +rule all: + input: + expand("{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir, + prefix="RAxML_bestTree", + clade="{clade}", + ext="StrainPhlAn3.tre"), + clade=config['strainphlan']['clades']) diff --git a/workflows/strainphlan/config.yaml b/workflows/strainphlan/config.yaml new file mode 100644 index 0000000..788a77d --- /dev/null +++ b/workflows/strainphlan/config.yaml @@ -0,0 +1,31 @@ +snakefiles: + metaphlan3: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan3/metaphlan/paired/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + sample2markers: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/sample2markers/Snakefile + strainphlan: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/strainphlan/Snakefile + +samples: + - sample_1 + - sample_2 + - sample_2 + +input_dir: /a/path/to/input/data +output_dir: output + +metaphlan3: + threads: 4 + input_type: fastq + options: --bowtie2db /pasteur/zeus/projets/p02/metasig/DBs/metaphlan/metaphlan3bowtie2db/ --index mpa_v30_CHOCOPhlAn_201901 + +metaphlan2_merge: + threads: 1 + +sample2markers: + threads: 4 + +strainphlan: + threads: 8 + options: "" + clades: + - s__Escherichia_coli + - s__Klebsiella_pneumoniae -- GitLab