Continue running all modules

55bf2224 · Amandine PERRIN · 4678f364 · 55bf2224
Commit 55bf2224 authored 4 years ago by Amandine PERRIN
--- a/PanACoTA/subcommands/all_modules.py
+++ b/PanACoTA/subcommands/all_modules.py
@@ -46,6 +46,16 @@ import sys
 from termcolor import colored
 import sys
+from PanACoTA import utils
+from PanACoTA.subcommands import prepare
+from PanACoTA.subcommands import annotate
+from PanACoTA.subcommands import pangenome
+from PanACoTA.subcommands import corepers
+from PanACoTA.subcommands import align
+from PanACoTA.subcommands import tree
+from PanACoTA import __version__ as version
 def main_from_parse(args):
    """
    Call main function from the arguments given by parser
@@ -66,13 +76,24 @@ def main(cmd, outdir, threads, NCBI_species_taxid, NCBI_species, levels, cutn, l
    """
    Call all modules, one by one, using output of one as input for the next one
    """
-    from PanACoTA import utils
+    os.makedirs(outdir, exist_ok=True)
-    from PanACoTA.subcommands import prepare
+    # Initialize logger
-    from PanACoTA.subcommands import annotate
+    import logging
-    from PanACoTA.subcommands import pangenome
+    # set level of logger: level is the minimum level that will be considered.
-    from PanACoTA.subcommands import corepers
+    if verbose <= 1:
-    from PanACoTA.subcommands import align
+        level = logging.INFO
-    from PanACoTA.subcommands import tree
+    # for verbose = 2, ignore only debug
+    if verbose >= 2 and verbose < 15:
+        level = utils.detail_lvl() # int corresponding to detail level
+    # for verbose >= 15, write everything
+    if verbose >= 15:
+        level = logging.DEBUG
+    logfile_base = os.path.join(outdir, "PanACoTA-all_modules")
+    logfile_base = utils.init_logger(logfile_base, level, name='all_modules', log_details=True,
+                                     verbose=verbose, quiet=quiet)
+    logger = logging.getLogger('all_modules')
+    logger.info(f'PanACoTA version {version}')
+    logger.info("Command used\n \t > " + cmd)
    # Run prepare module
    outdir_prepare = os.path.join(outdir, "1-prepare_module")
@@ -84,26 +105,53 @@ def main(cmd, outdir, threads, NCBI_species_taxid, NCBI_species, levels, cutn, l
    min_dist = 1e-4
    max_dist = 0.06
-    prepare.main(cmd, NCBI_species, NCBI_species_taxid, levels, outdir_prepare, tmp_dir,
+    info_file = prepare.main("PanACoTA prepare", NCBI_species, NCBI_species_taxid, levels,
-                 threads, no_refseq, db_dir, only_mash, info_file, l90, nbcont, cutn,
+                             outdir_prepare, tmp_dir, threads, no_refseq, db_dir, only_mash,
-                 min_dist, max_dist, verbose, quiet)
+                             info_file, l90, nbcont, cutn, min_dist, max_dist, verbose, quiet)
-# -> info_file
+    # Run annotate module
-#     # Run annotate module
+    list_file = ""
-#     list_file = ""
+    db_path = ""
-#     db_path = ""
+    outdir_annotate = os.path.join(outdir, "2-annotate_module")
-#     outdir_annotate = os.path.join(outdir, "2-annotate_module")
+    import time
-#     date = ""
+    date = time.strftime("%m%y")
-#     force = False
+    force = False
-#     qc_only = False
+    qc_only = False
-#     tmp_dir = ""
+    tmp_dir = ""
-#     res_annot_dir = None
+    res_annot_dir = None
-#     small = False
+    small = False
-#     annotate.main(cmd, list_file, db_path, outdir_annotate, name, date, l90, nbcont, cutn,
+    lstinfo, nbgenomes = annotate.main("PanACoTA annotate", list_file, db_path, outdir_annotate,
-#                   threads, force, qc_only, info_file, tmp_dir, res_annot_dir,
+                                       name, date, l90, nbcont, cutn, threads, force, qc_only,
-#                   verbose, quiet, prodigal_only, small)
+                                       info_file, tmp_dir, res_annot_dir, verbose, quiet,
+                                       prodigal_only, small)
+    # Pangenome step
+    name_pan = f"{name}_{nbgenomes}"
+    outdir_pan = os.path.join(outdir, "3-pangenome_module")
+    clust_mode = 1
+    spe_dir = ""
+    dbpath = os.path.join(outdir_annotate, "Proteins")
+    panfile = pangenome.main("PanACoTA pangenome", lstinfo, name, dbpath, min_id, outdir_pan,
+                             clust_mode, spe_dir, threads, outfile=None, verbose=verbose,
+                             quiet=quiet)
+    # Coregenome step
+    outdir_corpers = os.path.join(outdir, "4-corepers_module")
+    corepers_file = corepers.main("PanACoTA corepers", panfile, tol, multi, mixed, outdir_corpers,
+                                  verbose=verbose, quiet=quiet)
+    # Align step
+    outdir_align = os.path.join(outdir, "5-align_module")
+    force = False
+    align_file = align.main("PanACoTA align", corepers_file, lstinfo, name_pan, outdir_annotate,
+                            outdir_align, threads, force, verbose, quiet)
+    print(align_file)
+    # Tree step
+    outdir_tree = os.path.join(outdir, "6-tree_module")
+    tree.main("PanACoTA tree", align_file, outdir_tree, soft, model, threads, boot=False,
+              write_boot=False, memory=False, fast=False, verbose=verbose, quiet=quiet)
 def build_parser(parser):
    """
@@ -163,25 +211,29 @@ def build_parser(parser):
                                default=999, help=("Maximum number of contigs allowed to "
                                                   "keep a genome. Default is 999."))
+    # build_parser(prepare)
+    # args_prepare = parser.parse_args(argu)
+    # prepare.check_args(prepare_annote, args_prepare)
+    # OPTIONS = parse(my_parser, sys.argv[1:])
    annote = parser.add_argument_group("'annotate' module arguments")
    annote.add_argument("--prodigal", dest="prodigal_only", action="store_true", default=False,
                        help="Add this option if you only want syntactical annotation, given "
                             "by prodigal, and not functional annotation requiring prokka and "
                             "is slower.")
-    annote.add_argument("-n", dest="name", type=utils_argparse.gen_name,
+    annote.add_argument("-n", dest="name", required=True, type=utils_argparse.gen_name,
                        help=("Choose a name for your annotated genomes. This name should "
                              "contain 4 alphanumeric characters. Generally, they correspond "
                              "to the 2 first letters of genus, and 2 first letters of "
                              "species, e.g. ESCO for Escherichia Coli."))
    pangenome = parser.add_argument_group("'pangenome' module arguments")
-    pangenome.add_argument("-i", dest="min_id", type=perc_id, default=0.8,
+    pangenome.add_argument("-i", dest="min_id", type=utils_argparse.perc_id, default=0.8,
                           help=("Minimum sequence identity to be considered in the same "
                                 "cluster (float between 0 and 1). Default is 0.8."))
    corepers = parser.add_argument_group("'corepers' module arguments")
-    corepers.add_argument("-t", "--tol", dest="tol", default=1, type=utils_argparse.percentage,
+    corepers.add_argument("--tol", dest="tol", type=utils_argparse.percentage, default=1,
                          help=("min %% of genomes having at least 1 member in a family to "
                                "consider the family as persistent (between 0 and 1, "
                                "default is 1 = 100%% of genomes = Core genome)."
@@ -204,7 +256,7 @@ def build_parser(parser):
    tree = parser.add_argument_group("'tree' module arguments")
    softs = ["fasttree", "fastme", "quicktree", "iqtree", "iqtree2"]
-    tree.add_argument("-s", "--soft", dest="soft", choices=softs, default="iqtree",
+    tree.add_argument("--soft", dest="soft", choices=softs, default="iqtree",
                      help=("Choose with which software you want to infer the "
                            "phylogenetic tree. Default is IQtree."))