diff --git a/tools/eggnogmapper2/annotate/example_usage/config.yaml b/tools/eggnogmapper2/annotate/example_usage/config.yaml index c7db75de9db3189ffa55b7a30aeb8813d1c34708..8e808da3b2b881ad7bb0afd4c8f178958c33c116 100644 --- a/tools/eggnogmapper2/annotate/example_usage/config.yaml +++ b/tools/eggnogmapper2/annotate/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - eggnogmapper2_annotate: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/annotate/Snakefile + eggnogmapper2_annotate: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/annotate/Snakefile input_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output/eggnogmapper2/diamond/ output_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/tools/eggnogmapper2/diamond/example_usage/config.yaml b/tools/eggnogmapper2/diamond/example_usage/config.yaml index 5fe001f10aaab0b262680238247ff6684e7a9f5f..1db91027434c1c5e55d0f5d25f6ab5f9131b07b3 100644 --- a/tools/eggnogmapper2/diamond/example_usage/config.yaml +++ b/tools/eggnogmapper2/diamond/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - eggnogmapper2_diamond: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/diamond/Snakefile + eggnogmapper2_diamond: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/diamond/Snakefile input_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output/split_fasta output_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/tools/metaphlan3/metaphlan/paired/Snakefile b/tools/metaphlan3/metaphlan/paired/Snakefile index 55175901ad8cbe8aeb75453808d1b82196aa7810..87f4be7df06dcb1039952d068ae2e1d81500f668 100644 --- a/tools/metaphlan3/metaphlan/paired/Snakefile +++ b/tools/metaphlan3/metaphlan/paired/Snakefile @@ -18,7 +18,7 @@ rule metaphlan3_paired: r1 = __metaphlan3_input_r1, r2 = __metaphlan3_input_r2 output: - profile = __metaphlan3_output, + profile = __metaphlan3_output_profile, bowtie2out = __metaphlan3_output_bowtie2out, sams = __metaphlan3_output_sams params: diff --git a/tools/strainphlan/extract_markers/README.md b/tools/strainphlan/extract_markers/README.md new file mode 100644 index 0000000000000000000000000000000000000000..373182160daebbe4aadf5fee8848d999a25a8317 --- /dev/null +++ b/tools/strainphlan/extract_markers/README.md @@ -0,0 +1,18 @@ +# extract_markers.py for strainphlan + +This step will extract the markers of selected species from MetaPhlAn database. + +### Help section + +``` +usage: extract_markers.py [-h] [-d DATABASE] [-c CLADE] [-o OUTPUT_DIR] + +optional arguments: + -h, --help show this help message and exit + -d DATABASE, --database DATABASE + The input MetaPhlAn dtabase + -c CLADE, --clade CLADE + The clades to investigate + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory +``` diff --git a/tools/strainphlan/sample2markers/README.md b/tools/strainphlan/sample2markers/README.md index 8ef1f3bc248135cc747226d5935e2cbd48388886..78053e7abaa77d06229244c7f000712d9b8885dd 100644 --- a/tools/strainphlan/sample2markers/README.md +++ b/tools/strainphlan/sample2markers/README.md @@ -2,6 +2,9 @@ This step will reconstruct all species strains found in metaphlan output sam file and store them in a pickle file (*.pkl). Those strains are referred as sample-reconstructed strains. +.. **Note**: output pkl file is written should be written in a individual directory since the script tries + to create and write into a `tmp` dir that leads to error when running sample2markers.py in parallel. + ### Help section ``` diff --git a/tools/strainphlan/strainphlan/README.md b/tools/strainphlan/strainphlan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9def2825e4ce9f25194e863b5a743ac42bae0a77 --- /dev/null +++ b/tools/strainphlan/strainphlan/README.md @@ -0,0 +1,69 @@ +# extract_markers.py for strainphlan + +This step will build the multiple sequence alignment and the phylogenetic tree for each species. + +### Help section + +``` +usage: strainphlan [-h] [-d DATABASE] [-m CLADE_MARKERS] + [-s SAMPLES [SAMPLES ...]] [-r REFERENCES [REFERENCES ...]] + [-c CLADE] [-o OUTPUT_DIR] [-n NPROCS] + [--secondary_samples SECONDARY_SAMPLES [SECONDARY_SAMPLES ...]] + [--secondary_references SECONDARY_REFERENCES [SECONDARY_REFERENCES ...]] + [--trim_sequences TRIM_SEQUENCES] + [--marker_in_n_samples MARKER_IN_N_SAMPLES] + [--sample_with_n_markers SAMPLE_WITH_N_MARKERS] + [--secondary_sample_with_n_markers SECONDARY_SAMPLE_WITH_N_MARKERS] + [--phylophlan_mode {accurate,fast}] + [--phylophlan_configuration PHYLOPHLAN_CONFIGURATION] + [--mutation_rates] [--print_clades_only] + +optional arguments: + -h, --help show this help message and exit + -d DATABASE, --database DATABASE + The input MetaPhlAn 3.0 database (default: /pasteur/so + nic/homes/kehillio/miniconda3/envs/mpa/lib/python3.7/s + ite-packages/metaphlan/metaphlan_databases/mpa_v30_CHO + COPhlAn_201901.pkl) + -m CLADE_MARKERS, --clade_markers CLADE_MARKERS + The clade markers as FASTA file (default: None) + -s SAMPLES [SAMPLES ...], --samples SAMPLES [SAMPLES ...] + The reconstructed markers for each sample (default: + []) + -r REFERENCES [REFERENCES ...], --references REFERENCES [REFERENCES ...] + The reference genomes (default: []) + -c CLADE, --clade CLADE + The clade to investigate (default: None) + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory (default: None) + -n NPROCS, --nprocs NPROCS + The number of threads to use (default: 1) + --secondary_samples SECONDARY_SAMPLES [SECONDARY_SAMPLES ...] + The reconstructed markers for each secondary sample + (default: []) + --secondary_references SECONDARY_REFERENCES [SECONDARY_REFERENCES ...] + The secondary reference genomes (default: []) + --trim_sequences TRIM_SEQUENCES + The number of bases to remove from both ends when + trimming markers (default: 50) + --marker_in_n_samples MARKER_IN_N_SAMPLES + Theshold defining the minimum percentage of samples to + keep a marker (default: 80) + --sample_with_n_markers SAMPLE_WITH_N_MARKERS + Threshold defining the minimun number of markers to + keep a sample (default: 20) + --secondary_sample_with_n_markers SECONDARY_SAMPLE_WITH_N_MARKERS + Threshold defining the minimun number of markers to + keep a secondary sample (default: 20) + --phylophlan_mode {accurate,fast} + The presets for fast or accurate phylogenetic analysis + (default: accurate) + --phylophlan_configuration PHYLOPHLAN_CONFIGURATION + The PhyloPhlAn configuration file (default: None) + --mutation_rates If specified will produced a mutation rates table for + each of the aligned markers and a summary table for + the concatenated MSA. This operation can take long + time to finish (default: False) + --print_clades_only If specified only print the potential clades and stop + without building any tree (default: False) +``` diff --git a/tools/strainphlan/strainphlan/Snakefile b/tools/strainphlan/strainphlan/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..bf6d4effd2eb379e47496aa2666a4ce357421b2e --- /dev/null +++ b/tools/strainphlan/strainphlan/Snakefile @@ -0,0 +1,25 @@ +__strainphlan_exec_command = config.get('strainphlan', {}).get('exec_command', 'strainphlan') +__strainphlan_modules = config.get('strainphlan', {}).get('modules') +__strainphlan_options = config.get('strainphlan', {}).get('options', "") +__strainphlan_threads = config.get('strainphlan', {}).get('threads', 1) + + +rule strainphlan: + input: + __strainphlan_input + output: + __strainphlan_output + params: + exec_command = __strainphlan_exec_command, + modules = __strainphlan_modules, + clade = __strainphlan_clade, + output_dir = __strainphlan_output_dir, + options = __strainphlan_options + threads: + __strainphlan_threads + run: + command = [] + if params.modules: + command.append("module load {params.modules}") + command.append("{params.exec_command} -s {input} -n {threads} -c {params.clade} {params.options} -o {params.output_dir}") + shell(" && ".join(command)) diff --git a/tools/strainphlan/strainphlan/config.yaml b/tools/strainphlan/strainphlan/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9094c16763ca77b1005040ca87affe372d2f64e7 --- /dev/null +++ b/tools/strainphlan/strainphlan/config.yaml @@ -0,0 +1,5 @@ +input_dir: data + +strainphlan: + threads: 4 + exec_command: sample2markers.py diff --git a/tools/utils/cat/example_usage/config.yaml b/tools/utils/cat/example_usage/config.yaml index b0d0444ad73e58f3c1e6ebdde07204907fb38d95..fc1b43bf52c773c5974c1461092232cc4382b753 100644 --- a/tools/utils/cat/example_usage/config.yaml +++ b/tools/utils/cat/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - cat: /pasteur/projets/policy01/Atm/snakemake/tools/utils/cat/Snakefile + cat: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/cat/Snakefile input_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output/eggnogmapper2/diamond/ output_dir: /pasteur/projets/policy01/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/tools/utils/split_fasta/example_usage/config.yaml b/tools/utils/split_fasta/example_usage/config.yaml index 559a78914eed47dc84b3382b4e651ad793500dcb..6b47f0307f8fd3e60d0f03be8a11023c5d198d6e 100644 --- a/tools/utils/split_fasta/example_usage/config.yaml +++ b/tools/utils/split_fasta/example_usage/config.yaml @@ -1,5 +1,5 @@ snakefiles: - split_fasta: /pasteur/projets/policy01/Atm/snakemake/tools/utils/split_fasta/Snakefile + split_fasta: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/split_fasta/Snakefile input_fasta: /pasteur/projets/policy01/DBs/IGC/2014-9.9M/IGC.fa output_dir: /pasteur/projets/policy01/sandbox/20200210_test_snakemake/output diff --git a/workflows/eggnogmapperv2/config.yaml b/workflows/eggnogmapperv2/config.yaml index 10bd4b1ef1e47f342d64ab38b703e4c292be6d56..4e3f44d53646c5bd929560746f479956a1684b73 100644 --- a/workflows/eggnogmapperv2/config.yaml +++ b/workflows/eggnogmapperv2/config.yaml @@ -1,8 +1,8 @@ snakefiles: - eggnogmapper2_diamond: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/diamond/Snakefile - split_fasta: /pasteur/projets/policy01/Atm/snakemake/tools/utils/split_fasta/Snakefile - cat: /pasteur/projets/policy01/Atm/snakemake/tools/utils/cat/Snakefile - eggnogmapper2_annotate: /pasteur/projets/policy01/Atm/snakemake/tools/eggnogmapper2/annotate/Snakefile + eggnogmapper2_diamond: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/diamond/Snakefile + split_fasta: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/split_fasta/Snakefile + cat: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/utils/cat/Snakefile + eggnogmapper2_annotate: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/eggnogmapper2/annotate/Snakefile input_fasta: /pasteur/homes/kehillio/Atm/kenzo/sandbox/20200210_test_snakemake/test.fa output_dir: /pasteur/homes/kehillio/Atm/kenzo/sandbox/20200210_test_snakemake/test_output diff --git a/workflows/metaphlan2/paired_metaphlan2/config.yaml b/workflows/metaphlan2/paired_metaphlan2/config.yaml index 21b4cbde2f02258dc4727f8c7fb95bf64978f675..077607ac5800b5d0d208ed5a185ae1c577b5a09f 100644 --- a/workflows/metaphlan2/paired_metaphlan2/config.yaml +++ b/workflows/metaphlan2/paired_metaphlan2/config.yaml @@ -1,8 +1,8 @@ snakefiles: - metaphlan2: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2/paired/Snakefile - metaphlan2_merge: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile - metaphlan2_heatmap: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile - graphlan_from_metaphlan2: /pasteur/projets/policy01/Atm/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile + metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2/paired/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + metaphlan2_heatmap: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile + graphlan_from_metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile samples: - sample_1 diff --git a/workflows/metaphlan2/single_metaphlan2/config.yaml b/workflows/metaphlan2/single_metaphlan2/config.yaml index 6d7b5bf0d573cee970a0122ca45ad7b0ea941363..3d5dccc54ba33a3ec9e846d4dcd2812b383ea610 100644 --- a/workflows/metaphlan2/single_metaphlan2/config.yaml +++ b/workflows/metaphlan2/single_metaphlan2/config.yaml @@ -1,8 +1,8 @@ snakefiles: - metaphlan2: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2/single/Snakefile - metaphlan2_merge: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile - metaphlan2_heatmap: /pasteur/projets/policy01/Atm/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile - graphlan_from_metaphlan2: /pasteur/projets/policy01/Atm/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile + metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2/single/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + metaphlan2_heatmap: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile + graphlan_from_metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile samples: - sample_1 diff --git a/workflows/metaphlan3/README.md b/workflows/metaphlan3/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d1dc622811d5afaaa9d0b68b0fe592de77a0cfbb --- /dev/null +++ b/workflows/metaphlan3/README.md @@ -0,0 +1,8 @@ +# Simple metaphlan3 workflows + +Workflows using metaphlan3 and simple visualization of the results. + +All examples presented were made for our TARS cluster system. This means you will be likely to find some +absolute path into the `config.yaml` that you might not have access to. + +For every workflow, an example is provided and is based on the `config.yaml` file. Singularity images are necessary for these examples. diff --git a/workflows/metaphlan3/paired_metaphlan2/Snakefile b/workflows/metaphlan3/paired_metaphlan2/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..eecd4a4be184e070d0ac47a1ccced07e3eb9dafb --- /dev/null +++ b/workflows/metaphlan3/paired_metaphlan2/Snakefile @@ -0,0 +1,56 @@ +configfile: "config.yaml" + +# ==== Snakefile paths ==== +__metaphlan3_rules = config.get("snakefiles", {}).get("metaphlan3", "../../tools/metaphlan2/metaphlan2/Snakefile") +__metaphlan2_merge_rules = config.get("snakefiles", {}).get("metaphlan2_merge", "../../tools/metaphlan2/metaphlan2_merge/Snakefile") +__metaphlan2_heatmap_rules = config.get("snakefiles", {}).get("metaphlan2_heatmap", "../../tools/metaphlan2/metaphlan2_heatmap/Snakefile") +__graphlan_from_metaphlan2_rules = config.get("snakefiles", {}).get("graphlan_from_metaphlan2", "../subworkflows/graphlan_from_metaphlan2/Snakefile") + +__input_dir = config['input_dir'] +__main_output_dir = config.get('output_dir', 'output') + +# ---- Metaphlan3 +__metaphlan3_output_dir = __main_output_dir + "/metaphlan3" +__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') +__metaphlan3_input_r1 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_1.fq.gz") +__metaphlan3_input_r2 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_2.fq.gz") +__metaphlan3_output_profile = "{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}") +__metaphlan3_output_bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") +__metaphlan3_output_sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") + +include: __metaphlan3_rules + +# ---- Metaphlan2 merge +__metaphlan2_merge_output_dir = __main_output_dir + "/metaphlan_merge" +__metaphlan2_merge_output_file_name = config['metaphlan2_merge'].get('output_file_name',"merged_taxonomic_profiles.txt") +__metaphlan2_merge_input = __metaphlan3_output_profile +__metaphlan2_merge_output = "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) +include: __metaphlan2_merge_rules + +# ---- Metaphlan2 heatmap +__metaphlan2_heatmap_output_dir = __main_output_dir + "/metaphlan2_heatmap" +__metaphlan2_heatmap_output_file_name = config['metaphlan2_heatmap'].get('output_name',"heatmap.png") +__metaphlan2_heatmap_input = __metaphlan2_merge_output +__metaphlan2_heatmap_output = "{dir}/{file_name}".format(dir=__metaphlan2_heatmap_output_dir, + file_name=__metaphlan2_heatmap_output_file_name) +include: __metaphlan2_heatmap_rules + +# ---- Graphlan Dendogram +__graphlan_from_metaphlan2_output_dir = __main_output_dir + "/graphlan" +__graphlan_from_metaphlan2_output_file_name = config.get("graphlan_from_metaphlan2", {}).get('output_name',"dendrogram.png") +__graphlan_from_metaphlan2_input = __metaphlan2_merge_output +__graphlan_from_metaphlan2_output = "{dir}/{file_name}".format(dir=__graphlan_from_metaphlan2_output_dir, + file_name=__graphlan_from_metaphlan2_output_file_name) +include: __graphlan_from_metaphlan2_rules + +rule all: + input: + heatmap = __metaphlan2_heatmap_output, + dendogram = __graphlan_from_metaphlan2_output + diff --git a/workflows/metaphlan3/paired_metaphlan2/config.yaml b/workflows/metaphlan3/paired_metaphlan2/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f6af03fe172edc2f49883dc7477e760b3a52646 --- /dev/null +++ b/workflows/metaphlan3/paired_metaphlan2/config.yaml @@ -0,0 +1,43 @@ +snakefiles: + metaphlan3: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan3/metaphlan/paired/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + metaphlan2_heatmap: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_heatmap/Snakefile + graphlan_from_metaphlan2: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/subworkflows/graphlan_from_metaphlan2/Snakefile + +samples: + - sample_1 + - sample_2 + - sample_2 + +input_dir: /a/path/to/input/data +output_dir: metaphlan2_output + +metaphlan3: + modules: singularity + threads: 4 + input_type: fastq + options: --bowtie2db /pasteur/gaia/projets/p01/Atm/DBs/bowtie2/metaphlan2/ + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/metaphlan2/from_docker/metaphlan2_2.7.7_s3.2.1.simg metaphlan2.py + +metaphlan2_merge: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/metaphlan2/from_docker/metaphlan2_2.7.7_s3.2.1.simg merge_metaphlan_tables.py + +metaphlan2_heatmap: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/metaphlan2/from_docker/metaphlan2_2.6.0_s3.2.1.simg metaphlan_hclust_heatmap.py + output_name: snakemake_heatmap.png + +export2graphlan: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/graphlan/from_docker/graphlan_0.9.7_s3.2.1.simg export2graphlan.py + options: "--skip_rows 1,2 --most_abundant 100 --abundance_threshold 1 --least_biomarkers 10 --annotations 5,6 --external_annotations 7 --min_clade_size 1" + +graphlan_annotate: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/graphlan/from_docker/graphlan_0.9.7_s3.2.1.simg graphlan_annotate.py + +graphlan: + modules: singularity + exec_command: singularity exec --bind /pasteur/ /pasteur/gaia/projets/p01/Atm/singularity/tools/graphlan/from_docker/graphlan_0.9.7_s3.2.1.simg graphlan.py + options: "--dpi 300 --external_legends" diff --git a/workflows/strainphlan/README.md b/workflows/strainphlan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1bd172722b8e94154abf164bd65b4a540883bf9e --- /dev/null +++ b/workflows/strainphlan/README.md @@ -0,0 +1,9 @@ +# StrainPhlan + +This describes a workflow to run [strainphlan](https://github.com/biobakery/MetaPhlAn/wiki/StrainPhlAn-3.0) + +In brief it contains 4 steps: + +* Run Metaphlan on all samples (paired-ends) +* Extract markers from metaphlan outputs +* Run StrainPhlan on all selected clades \ No newline at end of file diff --git a/workflows/strainphlan/Snakefile b/workflows/strainphlan/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..f7eb095548c619bebdecbea7b0a7356b4a260726 --- /dev/null +++ b/workflows/strainphlan/Snakefile @@ -0,0 +1,78 @@ +configfile: "config.yaml" + +# ==== Snakefile paths ==== +__metaphlan3_rules = config.get("snakefiles", {}).get("metaphlan3") +__metaphlan2_merge_rules = config.get("snakefiles", {}).get("metaphlan2_merge") +__sample2markers_rules = config.get("snakefiles", {}).get("sample2markers") +__strainphlan_rules = config.get("snakefiles", {}).get("strainphlan") + +__input_dir = config['input_dir'] +__main_output_dir = config.get('output_dir', 'output') + +# ---- Metaphlan3 +__metaphlan3_output_dir = __main_output_dir + "/metaphlan3" +__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') +__metaphlan3_input_r1 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_1.fq.gz") +__metaphlan3_input_r2 = "{dir}/{sample}{ext}".format(dir=__input_dir, + sample="{sample}", + ext="_2.fq.gz") +__metaphlan3_output_profile = "{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}") +__metaphlan3_output_bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") +__metaphlan3_output_sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}") + +include: __metaphlan3_rules + +# ---- Metaphlan2 merge +__metaphlan2_merge_output_dir = __main_output_dir + "/metaphlan_merge" +__metaphlan2_merge_output_file_name = config['metaphlan2_merge'].get('output_file_name',"merged_taxonomic_profiles.txt") +__metaphlan2_merge_input = expand("{dir}/{sample}.profile.tsv".format(dir=__metaphlan3_output_dir, + sample="{sample}"), + sample=config['samples']) +__metaphlan2_merge_output = "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) +include: __metaphlan2_merge_rules + +rule metaphlan_merge_all: + input: + "{dir}/{file_name}".format(dir=__metaphlan2_merge_output_dir, + file_name=__metaphlan2_merge_output_file_name) + +# ---- Sample2markers ---- +__sample2markers_main_output_dir = __main_output_dir + "/sample2markers" +__sample2markers_output_dir = "{dir}/{sample}".format(dir=__sample2markers_main_output_dir, + sample="{sample}") +__sample2markers_input = __metaphlan3_output_sams +__sample2markers_output = "{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir, + sample="{sample}") + +include: __sample2markers_rules + + +# ---- StrainPhlan ---- + +__strainphlan_main_output_dir = __main_output_dir + "/strainphlan" +__strainphlan_clade = "{clade}" +__strainphlan_output_dir = "{dir}/{clade}".format(dir=__strainphlan_main_output_dir, + clade="{clade}") + +__strainphlan_input = expand("{dir}/{sample}.pkl".format(dir=__sample2markers_output_dir, + sample="{sample}"), + sample=config['samples']) +__strainphlan_output = "{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir, + prefix="RAxML_bestTree", + clade="{clade}", + ext="StrainPhlAn3.tre") + +include: __strainphlan_rules + + +rule all: + input: + expand("{dir}/{prefix}.{clade}.{ext}".format(dir=__strainphlan_output_dir, + prefix="RAxML_bestTree", + clade="{clade}", + ext="StrainPhlAn3.tre"), + clade=config['strainphlan']['clades']) diff --git a/workflows/strainphlan/config.yaml b/workflows/strainphlan/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..788a77d9f2627a9c54b18ca6c6e95102ea813210 --- /dev/null +++ b/workflows/strainphlan/config.yaml @@ -0,0 +1,31 @@ +snakefiles: + metaphlan3: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan3/metaphlan/paired/Snakefile + metaphlan2_merge: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/metaphlan2/metaphlan2_merge/Snakefile + sample2markers: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/sample2markers/Snakefile + strainphlan: /pasteur/zeus/projets/p02/metasig/gitlab/snakemake/tools/strainphlan/strainphlan/Snakefile + +samples: + - sample_1 + - sample_2 + - sample_2 + +input_dir: /a/path/to/input/data +output_dir: output + +metaphlan3: + threads: 4 + input_type: fastq + options: --bowtie2db /pasteur/zeus/projets/p02/metasig/DBs/metaphlan/metaphlan3bowtie2db/ --index mpa_v30_CHOCOPhlAn_201901 + +metaphlan2_merge: + threads: 1 + +sample2markers: + threads: 4 + +strainphlan: + threads: 8 + options: "" + clades: + - s__Escherichia_coli + - s__Klebsiella_pneumoniae