From 4134975fdd59af78e52b08e207b4383fbe7a18ee Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 11:06:36 +0100 Subject: [PATCH 1/8] start metaphlan3 rules --- tools/metaphlan3/metaphlan/paired/Snakefile | 35 +++++++++++++++++++ .../metaphlan/paired/config_example.yaml | 8 +++++ tools/metaphlan3/metaphlan/single/Snakefile | 26 ++++++++++++++ .../metaphlan/single/config_example.yaml | 7 ++++ 4 files changed, 76 insertions(+) create mode 100644 tools/metaphlan3/metaphlan/paired/Snakefile create mode 100644 tools/metaphlan3/metaphlan/paired/config_example.yaml create mode 100644 tools/metaphlan3/metaphlan/single/Snakefile create mode 100644 tools/metaphlan3/metaphlan/single/config_example.yaml diff --git a/tools/metaphlan3/metaphlan/paired/Snakefile b/tools/metaphlan3/metaphlan/paired/Snakefile new file mode 100644 index 0000000..36ba92b --- /dev/null +++ b/tools/metaphlan3/metaphlan/paired/Snakefile @@ -0,0 +1,35 @@ +__metaphlan3_exec_command = config.get('metaphlan3', {}).get('exec_command', 'metaphlan') +__metaphlan3_modules = config.get('metaphlan3', {}).get('modules') +__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') +__metaphlan3_options = config.get('metaphlan3', {}).get('options', "") +__metaphlan3_threads = config.get('metaphlan3', {}).get('threads', 1) + + +rule metaphlan3_paired: + """ + MetaPhlAn 3 can also natively handle paired-end metagenomes (but does not use the paired-end information), + and, more generally, metagenomes stored in multiple files (but you need to specify the --bowtie2out parameter): + + $ metaphlan metagenome_1.fastq,metagenome_2.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 + --input_type fastq > profiled_metagenome.txt + + """ + input: + r1 = __metaphlan3_input_r1, + r2 = __metaphlan3_input_r2 + output: + __metaphlan3_output + params: + exec_command = __metaphlan3_exec_command, + modules = __metaphlan3_modules, + input_type = __metaphlan3_input_type, + bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + options = __metaphlan3_options + threads: + __metaphlan3_threads + run: + command = [] + if params.modules: + command.append("module load {params.modules}") + command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} --bowtie2out {params.bowtie2out} {params.options} {input.r1},{input.r2} {output}") + shell(" && ".join(command)) diff --git a/tools/metaphlan3/metaphlan/paired/config_example.yaml b/tools/metaphlan3/metaphlan/paired/config_example.yaml new file mode 100644 index 0000000..e484c95 --- /dev/null +++ b/tools/metaphlan3/metaphlan/paired/config_example.yaml @@ -0,0 +1,8 @@ +input_dir: data + +metaphlan3: + threads: 1 + input_type: fastq + options: "" + pair_suffix: "" + exec_command: metaphlan diff --git a/tools/metaphlan3/metaphlan/single/Snakefile b/tools/metaphlan3/metaphlan/single/Snakefile new file mode 100644 index 0000000..8189f28 --- /dev/null +++ b/tools/metaphlan3/metaphlan/single/Snakefile @@ -0,0 +1,26 @@ +__metaphlan3_exec_command = config.get('metaphlan3', {}).get('exec_command', 'metaphlan') +__metaphlan3_modules = config.get('metaphlan3', {}).get('modules') +__metaphlan3_input_type = config['metaphlan3'].get('input_type', 'fastq') +__metaphlan3_options = config.get('metaphlan3', {}).get('options', "") +__metaphlan3_threads = config.get('metaphlan3', {}).get('threads', 1) + + +rule metaphlan3: + input: + __metaphlan3_input + output: + __metaphlan3_output + params: + exec_command = __metaphlan3_exec_command, + modules = __metaphlan3_modules, + input_type = __metaphlan3_input_type, + bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + options = __metaphlan3_options + threads: + __metaphlan3_threads + run: + command = [] + if params.modules: + command.append("module load {params.modules}") + command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} --bowtie2out {params.bowtie2out} {params.options} {input} {output}") + shell(" && ".join(command)) diff --git a/tools/metaphlan3/metaphlan/single/config_example.yaml b/tools/metaphlan3/metaphlan/single/config_example.yaml new file mode 100644 index 0000000..6a5c5db --- /dev/null +++ b/tools/metaphlan3/metaphlan/single/config_example.yaml @@ -0,0 +1,7 @@ +input_dir: data + +metaphlan3: + threads: 1 + input_type: fastq + options: "--bowtie2db /pasteur/projets/policy01/Atm/DBs/metaphlan/metaphlan3bowtie2db/" + exec_command: metaphlan -- GitLab From 491d5f1c240e7f77f70d54df970471e95182e95a Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 11:45:52 +0100 Subject: [PATCH 2/8] add sams output from metaphlan3 --- tools/metaphlan3/metaphlan/single/Snakefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/metaphlan3/metaphlan/single/Snakefile b/tools/metaphlan3/metaphlan/single/Snakefile index 8189f28..de7790a 100644 --- a/tools/metaphlan3/metaphlan/single/Snakefile +++ b/tools/metaphlan3/metaphlan/single/Snakefile @@ -14,7 +14,8 @@ rule metaphlan3: exec_command = __metaphlan3_exec_command, modules = __metaphlan3_modules, input_type = __metaphlan3_input_type, - bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + bowtie2out = "{output_dir}/bowtie2/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + sams = "{output_dir}/sams/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), options = __metaphlan3_options threads: __metaphlan3_threads @@ -22,5 +23,5 @@ rule metaphlan3: command = [] if params.modules: command.append("module load {params.modules}") - command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} --bowtie2out {params.bowtie2out} {params.options} {input} {output}") + command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} -s {params.sams} --bowtie2out {params.bowtie2out} {params.options} {input} {output}") shell(" && ".join(command)) -- GitLab From d1e501f6cc0cf2266c8ecfd9e4794f5bfe94f18e Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 12:07:20 +0100 Subject: [PATCH 3/8] fix Snakefile --- tools/metaphlan3/metaphlan/single/Snakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/metaphlan3/metaphlan/single/Snakefile b/tools/metaphlan3/metaphlan/single/Snakefile index de7790a..f203083 100644 --- a/tools/metaphlan3/metaphlan/single/Snakefile +++ b/tools/metaphlan3/metaphlan/single/Snakefile @@ -14,8 +14,8 @@ rule metaphlan3: exec_command = __metaphlan3_exec_command, modules = __metaphlan3_modules, input_type = __metaphlan3_input_type, - bowtie2out = "{output_dir}/bowtie2/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), - sams = "{output_dir}/sams/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + sams = "{output_dir}/{sample}.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), options = __metaphlan3_options threads: __metaphlan3_threads -- GitLab From 0ddcffa454ac5a3f05787bdb89e2c4c064392dd7 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 12:08:49 +0100 Subject: [PATCH 4/8] add sams output to paired metaphlan --- tools/metaphlan3/metaphlan/paired/Snakefile | 3 ++- tools/metaphlan3/metaphlan/single/Snakefile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/metaphlan3/metaphlan/paired/Snakefile b/tools/metaphlan3/metaphlan/paired/Snakefile index 36ba92b..3e41c2f 100644 --- a/tools/metaphlan3/metaphlan/paired/Snakefile +++ b/tools/metaphlan3/metaphlan/paired/Snakefile @@ -24,6 +24,7 @@ rule metaphlan3_paired: modules = __metaphlan3_modules, input_type = __metaphlan3_input_type, bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), options = __metaphlan3_options threads: __metaphlan3_threads @@ -31,5 +32,5 @@ rule metaphlan3_paired: command = [] if params.modules: command.append("module load {params.modules}") - command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} --bowtie2out {params.bowtie2out} {params.options} {input.r1},{input.r2} {output}") + command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} -s {params.sams} --bowtie2out {params.bowtie2out} {params.options} {input.r1},{input.r2} {output}") shell(" && ".join(command)) diff --git a/tools/metaphlan3/metaphlan/single/Snakefile b/tools/metaphlan3/metaphlan/single/Snakefile index f203083..45a18c2 100644 --- a/tools/metaphlan3/metaphlan/single/Snakefile +++ b/tools/metaphlan3/metaphlan/single/Snakefile @@ -15,7 +15,7 @@ rule metaphlan3: modules = __metaphlan3_modules, input_type = __metaphlan3_input_type, bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), - sams = "{output_dir}/{sample}.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), + sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), options = __metaphlan3_options threads: __metaphlan3_threads -- GitLab From 6cad6524765c802fe7424119be81694727672a9b Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 15:00:53 +0100 Subject: [PATCH 5/8] move bowtie2out and sams from params to output --- tools/metaphlan3/metaphlan/single/Snakefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/metaphlan3/metaphlan/single/Snakefile b/tools/metaphlan3/metaphlan/single/Snakefile index 45a18c2..3e86bed 100644 --- a/tools/metaphlan3/metaphlan/single/Snakefile +++ b/tools/metaphlan3/metaphlan/single/Snakefile @@ -9,13 +9,13 @@ rule metaphlan3: input: __metaphlan3_input output: - __metaphlan3_output + profile = __metaphlan3_output_profile, + bowtie2out = __metaphlan3_output_bowtie2out, + sams = __metaphlan3_output_sams params: exec_command = __metaphlan3_exec_command, modules = __metaphlan3_modules, input_type = __metaphlan3_input_type, - bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), - sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), options = __metaphlan3_options threads: __metaphlan3_threads @@ -23,5 +23,5 @@ rule metaphlan3: command = [] if params.modules: command.append("module load {params.modules}") - command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} -s {params.sams} --bowtie2out {params.bowtie2out} {params.options} {input} {output}") + command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} -s {output.sams} --bowtie2out {output.bowtie2out} {params.options} {input} {output.profile}") shell(" && ".join(command)) -- GitLab From e60c44c96917cc1cae0ce933e4895c68f9208900 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 15:04:56 +0100 Subject: [PATCH 6/8] update paired metaphlan with new outputs --- tools/metaphlan3/metaphlan/paired/Snakefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/metaphlan3/metaphlan/paired/Snakefile b/tools/metaphlan3/metaphlan/paired/Snakefile index 3e41c2f..5517590 100644 --- a/tools/metaphlan3/metaphlan/paired/Snakefile +++ b/tools/metaphlan3/metaphlan/paired/Snakefile @@ -18,13 +18,13 @@ rule metaphlan3_paired: r1 = __metaphlan3_input_r1, r2 = __metaphlan3_input_r2 output: - __metaphlan3_output + profile = __metaphlan3_output, + bowtie2out = __metaphlan3_output_bowtie2out, + sams = __metaphlan3_output_sams params: exec_command = __metaphlan3_exec_command, modules = __metaphlan3_modules, input_type = __metaphlan3_input_type, - bowtie2out = "{output_dir}/{sample}.bowtie2.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), - sams = "{output_dir}/{sample}.sam.bz2".format(output_dir=__metaphlan3_output_dir, sample="{sample}"), options = __metaphlan3_options threads: __metaphlan3_threads @@ -32,5 +32,5 @@ rule metaphlan3_paired: command = [] if params.modules: command.append("module load {params.modules}") - command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} -s {params.sams} --bowtie2out {params.bowtie2out} {params.options} {input.r1},{input.r2} {output}") + command.append("{params.exec_command} --nproc {threads} --input_type {params.input_type} -s {output.sams} --bowtie2out {output.bowtie2out} {params.options} {input.r1},{input.r2} {output.profile}") shell(" && ".join(command)) -- GitLab From 3bd54e46df22ec55541c8b16cf82f4c7f241b1fe Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 15:19:52 +0100 Subject: [PATCH 7/8] add sample2markers for strainphlan --- tools/strainphlan/sample2markers/README.md | 26 ++++++++++++++++++++ tools/strainphlan/sample2markers/Snakefile | 24 ++++++++++++++++++ tools/strainphlan/sample2markers/config.yaml | 5 ++++ 3 files changed, 55 insertions(+) create mode 100644 tools/strainphlan/sample2markers/README.md create mode 100644 tools/strainphlan/sample2markers/Snakefile create mode 100644 tools/strainphlan/sample2markers/config.yaml diff --git a/tools/strainphlan/sample2markers/README.md b/tools/strainphlan/sample2markers/README.md new file mode 100644 index 0000000..8ef1f3b --- /dev/null +++ b/tools/strainphlan/sample2markers/README.md @@ -0,0 +1,26 @@ +# sample2markers.py for strainphlan + +This step will reconstruct all species strains found in metaphlan output sam file and store them in a pickle file (*.pkl). Those strains are referred as sample-reconstructed strains. + +### Help section + +``` +usage: sample2markers.py [-h] [-i INPUT [INPUT ...]] [--sorted] + [-f INPUT_FORMAT] [-o OUTPUT_DIR] + [-b BREADTH_THRESHOLD] [-n NPROCS] + +optional arguments: + -h, --help show this help message and exit + -i INPUT [INPUT ...], --input INPUT [INPUT ...] + The input samples as SAM or BAM files + --sorted Whether the BAM input files are sorted. Default false + -f INPUT_FORMAT, --input_format INPUT_FORMAT + The input samples format {bam, sam, bz2}. Default bz2 + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory + -b BREADTH_THRESHOLD, --breadth_threshold BREADTH_THRESHOLD + The breadth of coverage threshold for the consensus + markers. Default 80 (%) + -n NPROCS, --nprocs NPROCS + The number of threads to execute the script +``` diff --git a/tools/strainphlan/sample2markers/Snakefile b/tools/strainphlan/sample2markers/Snakefile new file mode 100644 index 0000000..c9ee84c --- /dev/null +++ b/tools/strainphlan/sample2markers/Snakefile @@ -0,0 +1,24 @@ +__sample2markers_exec_command = config.get('sample2markers', {}).get('exec_command', 'sample2markers.py') +__sample2markers_modules = config.get('sample2markers', {}).get('modules') +__sample2markers_options = config.get('sample2markers', {}).get('options', "") +__sample2markers_threads = config.get('sample2markers', {}).get('threads', 1) + + +rule sample2markers: + input: + __sample2markers_input + output: + dir = __sample2markers_output_dir, + file = __sample2markers_output + params: + exec_command = __sample2markers_exec_command, + modules = __sample2markers_modules, + options = __sample2markers_options + threads: + __sample2markers_threads + run: + command = [] + if params.modules: + command.append("module load {params.modules}") + command.append("{params.exec_command} -n {threads} {params.options} -i {input} -o {output.dir}") + shell(" && ".join(command)) diff --git a/tools/strainphlan/sample2markers/config.yaml b/tools/strainphlan/sample2markers/config.yaml new file mode 100644 index 0000000..a4f2524 --- /dev/null +++ b/tools/strainphlan/sample2markers/config.yaml @@ -0,0 +1,5 @@ +input_dir: data + +sample2markers: + threads: 1 + exec_command: sample2markers.py -- GitLab From b265ed7fe58200c4b9551d09a2b35ac9cca65f91 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion Date: Thu, 19 Nov 2020 15:28:19 +0100 Subject: [PATCH 8/8] mv output dir to params --- tools/strainphlan/sample2markers/Snakefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/strainphlan/sample2markers/Snakefile b/tools/strainphlan/sample2markers/Snakefile index c9ee84c..a577299 100644 --- a/tools/strainphlan/sample2markers/Snakefile +++ b/tools/strainphlan/sample2markers/Snakefile @@ -8,11 +8,11 @@ rule sample2markers: input: __sample2markers_input output: - dir = __sample2markers_output_dir, - file = __sample2markers_output + __sample2markers_output params: exec_command = __sample2markers_exec_command, modules = __sample2markers_modules, + output_dir = __sample2markers_output_dir, options = __sample2markers_options threads: __sample2markers_threads @@ -20,5 +20,5 @@ rule sample2markers: command = [] if params.modules: command.append("module load {params.modules}") - command.append("{params.exec_command} -n {threads} {params.options} -i {input} -o {output.dir}") + command.append("{params.exec_command} -n {threads} {params.options} -i {input} -o {params.output_dir}") shell(" && ".join(command)) -- GitLab