diff --git a/tools/strainphlan/extract_markers/README.md b/tools/strainphlan/extract_markers/README.md new file mode 100644 index 0000000000000000000000000000000000000000..373182160daebbe4aadf5fee8848d999a25a8317 --- /dev/null +++ b/tools/strainphlan/extract_markers/README.md @@ -0,0 +1,18 @@ +# extract_markers.py for strainphlan + +This step will extract the markers of selected species from MetaPhlAn database. + +### Help section + +``` +usage: extract_markers.py [-h] [-d DATABASE] [-c CLADE] [-o OUTPUT_DIR] + +optional arguments: + -h, --help show this help message and exit + -d DATABASE, --database DATABASE + The input MetaPhlAn dtabase + -c CLADE, --clade CLADE + The clades to investigate + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory +``` diff --git a/tools/strainphlan/strainphlan/README.md b/tools/strainphlan/strainphlan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9def2825e4ce9f25194e863b5a743ac42bae0a77 --- /dev/null +++ b/tools/strainphlan/strainphlan/README.md @@ -0,0 +1,69 @@ +# extract_markers.py for strainphlan + +This step will build the multiple sequence alignment and the phylogenetic tree for each species. + +### Help section + +``` +usage: strainphlan [-h] [-d DATABASE] [-m CLADE_MARKERS] + [-s SAMPLES [SAMPLES ...]] [-r REFERENCES [REFERENCES ...]] + [-c CLADE] [-o OUTPUT_DIR] [-n NPROCS] + [--secondary_samples SECONDARY_SAMPLES [SECONDARY_SAMPLES ...]] + [--secondary_references SECONDARY_REFERENCES [SECONDARY_REFERENCES ...]] + [--trim_sequences TRIM_SEQUENCES] + [--marker_in_n_samples MARKER_IN_N_SAMPLES] + [--sample_with_n_markers SAMPLE_WITH_N_MARKERS] + [--secondary_sample_with_n_markers SECONDARY_SAMPLE_WITH_N_MARKERS] + [--phylophlan_mode {accurate,fast}] + [--phylophlan_configuration PHYLOPHLAN_CONFIGURATION] + [--mutation_rates] [--print_clades_only] + +optional arguments: + -h, --help show this help message and exit + -d DATABASE, --database DATABASE + The input MetaPhlAn 3.0 database (default: /pasteur/so + nic/homes/kehillio/miniconda3/envs/mpa/lib/python3.7/s + ite-packages/metaphlan/metaphlan_databases/mpa_v30_CHO + COPhlAn_201901.pkl) + -m CLADE_MARKERS, --clade_markers CLADE_MARKERS + The clade markers as FASTA file (default: None) + -s SAMPLES [SAMPLES ...], --samples SAMPLES [SAMPLES ...] + The reconstructed markers for each sample (default: + []) + -r REFERENCES [REFERENCES ...], --references REFERENCES [REFERENCES ...] + The reference genomes (default: []) + -c CLADE, --clade CLADE + The clade to investigate (default: None) + -o OUTPUT_DIR, --output_dir OUTPUT_DIR + The output directory (default: None) + -n NPROCS, --nprocs NPROCS + The number of threads to use (default: 1) + --secondary_samples SECONDARY_SAMPLES [SECONDARY_SAMPLES ...] + The reconstructed markers for each secondary sample + (default: []) + --secondary_references SECONDARY_REFERENCES [SECONDARY_REFERENCES ...] + The secondary reference genomes (default: []) + --trim_sequences TRIM_SEQUENCES + The number of bases to remove from both ends when + trimming markers (default: 50) + --marker_in_n_samples MARKER_IN_N_SAMPLES + Theshold defining the minimum percentage of samples to + keep a marker (default: 80) + --sample_with_n_markers SAMPLE_WITH_N_MARKERS + Threshold defining the minimun number of markers to + keep a sample (default: 20) + --secondary_sample_with_n_markers SECONDARY_SAMPLE_WITH_N_MARKERS + Threshold defining the minimun number of markers to + keep a secondary sample (default: 20) + --phylophlan_mode {accurate,fast} + The presets for fast or accurate phylogenetic analysis + (default: accurate) + --phylophlan_configuration PHYLOPHLAN_CONFIGURATION + The PhyloPhlAn configuration file (default: None) + --mutation_rates If specified will produced a mutation rates table for + each of the aligned markers and a summary table for + the concatenated MSA. This operation can take long + time to finish (default: False) + --print_clades_only If specified only print the potential clades and stop + without building any tree (default: False) +```