diff --git a/tools/strainphlan/sample2markers/README.md b/tools/strainphlan/sample2markers/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8ef1f3bc248135cc747226d5935e2cbd48388886 --- /dev/null +++ b/tools/strainphlan/sample2markers/README.md @@ -0,0 +1,26 @@ +# sample2markers.py for strainphlan + +This step will reconstruct all species strains found in metaphlan output sam file and store them in a pickle file (*.pkl). Those strains are referred as sample-reconstructed strains. + +### Help section + +``` +usage: sample2markers.py [-h] [-i INPUT [INPUT ...]] [--sorted] + [-f INPUT_FORMAT] [-o OUTPUT_DIR] + [-b BREADTH_THRESHOLD] [-n NPROCS] + +optional arguments: + -h, --help show this help message and exit + -i INPUT [INPUT ...], --input INPUT [INPUT ...] + The input samples as SAM or BAM files + --sorted Whether the BAM input files are sorted. Default false + -f INPUT_FORMAT, --input_format INPUT_FORMAT + The input samples format {bam, sam, bz2}. Default bz2 + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory + -b BREADTH_THRESHOLD, --breadth_threshold BREADTH_THRESHOLD + The breadth of coverage threshold for the consensus + markers. Default 80 (%) + -n NPROCS, --nprocs NPROCS + The number of threads to execute the script +``` diff --git a/tools/strainphlan/sample2markers/Snakefile b/tools/strainphlan/sample2markers/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..c9ee84c85b4717ae1fb08db8eba516f216e1abfb --- /dev/null +++ b/tools/strainphlan/sample2markers/Snakefile @@ -0,0 +1,24 @@ +__sample2markers_exec_command = config.get('sample2markers', {}).get('exec_command', 'sample2markers.py') +__sample2markers_modules = config.get('sample2markers', {}).get('modules') +__sample2markers_options = config.get('sample2markers', {}).get('options', "") +__sample2markers_threads = config.get('sample2markers', {}).get('threads', 1) + + +rule sample2markers: + input: + __sample2markers_input + output: + dir = __sample2markers_output_dir, + file = __sample2markers_output + params: + exec_command = __sample2markers_exec_command, + modules = __sample2markers_modules, + options = __sample2markers_options + threads: + __sample2markers_threads + run: + command = [] + if params.modules: + command.append("module load {params.modules}") + command.append("{params.exec_command} -n {threads} {params.options} -i {input} -o {output.dir}") + shell(" && ".join(command)) diff --git a/tools/strainphlan/sample2markers/config.yaml b/tools/strainphlan/sample2markers/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4f25247f95f9bc37ff7125655c95ee3897935e5 --- /dev/null +++ b/tools/strainphlan/sample2markers/config.yaml @@ -0,0 +1,5 @@ +input_dir: data + +sample2markers: + threads: 1 + exec_command: sample2markers.py