From 55bf222489879373fc948a86c0f25bcdc9013ebd Mon Sep 17 00:00:00 2001 From: Amandine PERRIN <amandine.perrin@pasteur.fr> Date: Thu, 22 Oct 2020 16:20:37 +0200 Subject: [PATCH] Continue running all modules --- PanACoTA/subcommands/all_modules.py | 114 ++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 31 deletions(-) diff --git a/PanACoTA/subcommands/all_modules.py b/PanACoTA/subcommands/all_modules.py index 5004199c..835d2684 100644 --- a/PanACoTA/subcommands/all_modules.py +++ b/PanACoTA/subcommands/all_modules.py @@ -46,6 +46,16 @@ import sys from termcolor import colored import sys +from PanACoTA import utils +from PanACoTA.subcommands import prepare +from PanACoTA.subcommands import annotate +from PanACoTA.subcommands import pangenome +from PanACoTA.subcommands import corepers +from PanACoTA.subcommands import align +from PanACoTA.subcommands import tree +from PanACoTA import __version__ as version + + def main_from_parse(args): """ Call main function from the arguments given by parser @@ -66,13 +76,24 @@ def main(cmd, outdir, threads, NCBI_species_taxid, NCBI_species, levels, cutn, l """ Call all modules, one by one, using output of one as input for the next one """ - from PanACoTA import utils - from PanACoTA.subcommands import prepare - from PanACoTA.subcommands import annotate - from PanACoTA.subcommands import pangenome - from PanACoTA.subcommands import corepers - from PanACoTA.subcommands import align - from PanACoTA.subcommands import tree + os.makedirs(outdir, exist_ok=True) + # Initialize logger + import logging + # set level of logger: level is the minimum level that will be considered. + if verbose <= 1: + level = logging.INFO + # for verbose = 2, ignore only debug + if verbose >= 2 and verbose < 15: + level = utils.detail_lvl() # int corresponding to detail level + # for verbose >= 15, write everything + if verbose >= 15: + level = logging.DEBUG + logfile_base = os.path.join(outdir, "PanACoTA-all_modules") + logfile_base = utils.init_logger(logfile_base, level, name='all_modules', log_details=True, + verbose=verbose, quiet=quiet) + logger = logging.getLogger('all_modules') + logger.info(f'PanACoTA version {version}') + logger.info("Command used\n \t > " + cmd) # Run prepare module outdir_prepare = os.path.join(outdir, "1-prepare_module") @@ -84,26 +105,53 @@ def main(cmd, outdir, threads, NCBI_species_taxid, NCBI_species, levels, cutn, l min_dist = 1e-4 max_dist = 0.06 - prepare.main(cmd, NCBI_species, NCBI_species_taxid, levels, outdir_prepare, tmp_dir, - threads, no_refseq, db_dir, only_mash, info_file, l90, nbcont, cutn, - min_dist, max_dist, verbose, quiet) -# -> info_file - -# # Run annotate module -# list_file = "" -# db_path = "" -# outdir_annotate = os.path.join(outdir, "2-annotate_module") -# date = "" -# force = False -# qc_only = False -# tmp_dir = "" -# res_annot_dir = None -# small = False - -# annotate.main(cmd, list_file, db_path, outdir_annotate, name, date, l90, nbcont, cutn, -# threads, force, qc_only, info_file, tmp_dir, res_annot_dir, -# verbose, quiet, prodigal_only, small) - + info_file = prepare.main("PanACoTA prepare", NCBI_species, NCBI_species_taxid, levels, + outdir_prepare, tmp_dir, threads, no_refseq, db_dir, only_mash, + info_file, l90, nbcont, cutn, min_dist, max_dist, verbose, quiet) + + # Run annotate module + list_file = "" + db_path = "" + outdir_annotate = os.path.join(outdir, "2-annotate_module") + import time + date = time.strftime("%m%y") + force = False + qc_only = False + tmp_dir = "" + res_annot_dir = None + small = False + + lstinfo, nbgenomes = annotate.main("PanACoTA annotate", list_file, db_path, outdir_annotate, + name, date, l90, nbcont, cutn, threads, force, qc_only, + info_file, tmp_dir, res_annot_dir, verbose, quiet, + prodigal_only, small) + + # Pangenome step + name_pan = f"{name}_{nbgenomes}" + outdir_pan = os.path.join(outdir, "3-pangenome_module") + clust_mode = 1 + spe_dir = "" + dbpath = os.path.join(outdir_annotate, "Proteins") + panfile = pangenome.main("PanACoTA pangenome", lstinfo, name, dbpath, min_id, outdir_pan, + clust_mode, spe_dir, threads, outfile=None, verbose=verbose, + quiet=quiet) + + # Coregenome step + outdir_corpers = os.path.join(outdir, "4-corepers_module") + corepers_file = corepers.main("PanACoTA corepers", panfile, tol, multi, mixed, outdir_corpers, + verbose=verbose, quiet=quiet) + + # Align step + outdir_align = os.path.join(outdir, "5-align_module") + force = False + align_file = align.main("PanACoTA align", corepers_file, lstinfo, name_pan, outdir_annotate, + outdir_align, threads, force, verbose, quiet) + print(align_file) + + # Tree step + outdir_tree = os.path.join(outdir, "6-tree_module") + tree.main("PanACoTA tree", align_file, outdir_tree, soft, model, threads, boot=False, + write_boot=False, memory=False, fast=False, verbose=verbose, quiet=quiet) def build_parser(parser): """ @@ -163,25 +211,29 @@ def build_parser(parser): default=999, help=("Maximum number of contigs allowed to " "keep a genome. Default is 999.")) + # build_parser(prepare) + # args_prepare = parser.parse_args(argu) + # prepare.check_args(prepare_annote, args_prepare) + # OPTIONS = parse(my_parser, sys.argv[1:]) annote = parser.add_argument_group("'annotate' module arguments") annote.add_argument("--prodigal", dest="prodigal_only", action="store_true", default=False, help="Add this option if you only want syntactical annotation, given " "by prodigal, and not functional annotation requiring prokka and " "is slower.") - annote.add_argument("-n", dest="name", type=utils_argparse.gen_name, + annote.add_argument("-n", dest="name", required=True, type=utils_argparse.gen_name, help=("Choose a name for your annotated genomes. This name should " "contain 4 alphanumeric characters. Generally, they correspond " "to the 2 first letters of genus, and 2 first letters of " "species, e.g. ESCO for Escherichia Coli.")) pangenome = parser.add_argument_group("'pangenome' module arguments") - pangenome.add_argument("-i", dest="min_id", type=perc_id, default=0.8, + pangenome.add_argument("-i", dest="min_id", type=utils_argparse.perc_id, default=0.8, help=("Minimum sequence identity to be considered in the same " "cluster (float between 0 and 1). Default is 0.8.")) corepers = parser.add_argument_group("'corepers' module arguments") - corepers.add_argument("-t", "--tol", dest="tol", default=1, type=utils_argparse.percentage, + corepers.add_argument("--tol", dest="tol", type=utils_argparse.percentage, default=1, help=("min %% of genomes having at least 1 member in a family to " "consider the family as persistent (between 0 and 1, " "default is 1 = 100%% of genomes = Core genome)." @@ -204,7 +256,7 @@ def build_parser(parser): tree = parser.add_argument_group("'tree' module arguments") softs = ["fasttree", "fastme", "quicktree", "iqtree", "iqtree2"] - tree.add_argument("-s", "--soft", dest="soft", choices=softs, default="iqtree", + tree.add_argument("--soft", dest="soft", choices=softs, default="iqtree", help=("Choose with which software you want to infer the " "phylogenetic tree. Default is IQtree.")) -- GitLab