diff --git a/Examples/input_files/configfile.ini b/Examples/input_files/configfile.ini
index c481f05fc5f3e1594211230557f2d5fd41f151f6..7c62775618116231e45be824fa19c152be3010bf 100644
--- a/Examples/input_files/configfile.ini
+++ b/Examples/input_files/configfile.ini
@@ -25,6 +25,7 @@
#qc_only = False
#db_path =
#prodigal_only = False
+#small = False
#name =
#date =
#l90 = 100
diff --git a/PanACoTA/annotate_module/annotation_functions.py b/PanACoTA/annotate_module/annotation_functions.py
index fa62ab7ebc0904add5d76da4c090b7c2d60bb0d2..81846f0f56808c93c45de1dc27c94dbd651abb22 100755
--- a/PanACoTA/annotate_module/annotation_functions.py
+++ b/PanACoTA/annotate_module/annotation_functions.py
@@ -58,7 +58,7 @@ logger = logging.getLogger('annotate.run_annotation_all')
def run_annotation_all(genomes, threads, force, annot_folder, fgn, prodigal_only=False,
- quiet=False):
+ small=False, quiet=False):
"""
For each genome in genomes, run prokka (or only prodigal) to annotate the genome.
@@ -80,6 +80,8 @@ def run_annotation_all(genomes, threads, force, annot_folder, fgn, prodigal_only
name (key in genomes dict) of the fist genome, which will be used for prodigal training
prodigal_only : bool
True if only prodigal must run, False if prokka must run
+ small : bool
+ True -> use -p meta option with prodigal. Do not use training
quiet : bool
True if nothing must be written to stderr/stdout, False otherwise
@@ -126,7 +128,10 @@ def run_annotation_all(genomes, threads, force, annot_folder, fgn, prodigal_only
# If problem, gpath_train will be empty, but this will be checked while
# trying to run prodigal, because we also need to check that genomes are not simply
# already annotated
- gpath_train = prodigal_train(gtrain, annot_folder)
+ if not small:
+ gpath_train = prodigal_train(gtrain, annot_folder)
+ else:
+ gpath_train = "small option"
elif threads <= 3:
# less than 3 threads: run prokka 1 by 1 with all threads
cores_annot = threads
@@ -381,10 +386,13 @@ def run_prodigal(arguments):
logger.warning("Prodigal results folder already exists, but is removed because "
"--force option was used.")
- # If training file does not exist and prodigal result directory neither, return False
+ # Training file can be "small option", meaning that we did not use the training mode.
+ # If not "small option", we used the training mode. If training file does not exist
+ # and prodigal result directory neither, return False
# We cannot annotate using nothing.
# Happens if there was a problem while training
- if not os.path.isfile(gpath_train) and not os.path.isdir(prodigal_dir):
+ if (gpath_train != "small option" and not os.path.isfile(gpath_train)
+ and not os.path.isdir(prodigal_dir)):
return False
logger.log(utils.detail_lvl(), f"Start annotating {name} (from {gpath} sequence) "
@@ -404,13 +412,13 @@ def run_prodigal(arguments):
"remove this result folder, or use '-F' or '--force' "
"option.".format(prodigal_dir))
- logger.log(utils.detail_lvl(), "End annotating {} (from {})".format(name, gpath))
+ logger.log(utils.detail_lvl(), f"End annotating {name} (from {gpath})")
# If missing files, or other problems in result dir, error message,
# ask user to force or remove this folder.
else:
logger.warning("Problems in the files contained in your already existing output dir "
- "({}). Please check it, or remove it to "
- "re-annotate.".format(prodigal_dir))
+ f"({prodigal_dir}). Please check it, or remove it to "
+ "re-annotate.")
# If everything was ok -> everything is ready for next step -> return True
# If something is wrong -> cannot use those results, genome won't be annotated
# -> return False
@@ -430,8 +438,12 @@ def run_prodigal(arguments):
error = (f"Error while trying to run prodigal. See {prodigal_logfile_err}.")
prodigalf = open(prodigal_logfile, "w")
prodigalferr = open(prodigal_logfile_err, "w")
+ if gpath_train == "small option":
+ training = "-p meta"
+ else:
+ training = f"-t {gpath_train}"
cmd = (f"prodigal -i {gpath} -d {basic_outname + '.ffn'} -a {basic_outname + '.faa'} "
- f"-f gff -o {basic_outname + '.gff'} -t {gpath_train} -q")
+ f"-f gff -o {basic_outname + '.gff'} {training} -q")
logger.log(utils.detail_lvl(), "Prodigal command: " + cmd)
ret = utils.run_cmd(cmd, error, eof=False, stderr=prodigalferr, stdout=prodigalf,
diff --git a/PanACoTA/subcommands/annotate.py b/PanACoTA/subcommands/annotate.py
index 7e0589adfba94e622ae6370892dc382a93755d61..4b9776ad3bb7adfce6e5e903ce9a305787105d38 100755
--- a/PanACoTA/subcommands/annotate.py
+++ b/PanACoTA/subcommands/annotate.py
@@ -151,12 +151,13 @@ def main_from_parse(arguments):
arguments.name,
arguments.date, arguments.l90, arguments.nbcont, arguments.cutn, arguments.threads,
arguments.force, arguments.qc_only, arguments.from_info, arguments.tmpdir,
- arguments.annotdir, arguments.verbose, arguments.quiet, arguments.prodigal_only)
+ arguments.annotdir, arguments.verbose, arguments.quiet, arguments.prodigal_only,
+ arguments.small)
def main(cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn=5,
threads=1, force=False, qc_only=False, from_info=None, tmp_dir=None, res_annot_dir=None,
- verbose=0, quiet=False, prodigal_only=False):
+ verbose=0, quiet=False, prodigal_only=False, small=False):
"""
Main method, doing all steps:
@@ -224,6 +225,8 @@ def main(cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn
True if nothing must be sent to stdout/stderr, False otherwise
prodigal_only : bool
True -> run only prodigal. False -> run prokka
+ small : bool
+ True -> use -p meta option with prodigal
Returns
-------
@@ -381,7 +384,7 @@ def main(cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn
# STEP 4. Annotate all kept genomes
results = pfunc.run_annotation_all(kept_genomes, threads, force, res_annot_dir, first_gname,
- prodigal_only, quiet=quiet)
+ prodigal_only, small=small, quiet=quiet)
# Information on genomes to format
# results_ok = {genome: [gembase_name, path_to_origfile, path_split_gembase,
# gsize, nbcont, L90]}
@@ -470,6 +473,10 @@ def build_parser(parser):
help="Add this option if you only want syntactical annotation, given "
"by prodigal, and not functional annotation requiring prokka and "
"is slower.")
+ optional.add_argument("--small", dest="small", action="store_true", default=False,
+ help="If you use Prodigal to annotate genomes, if you sequences are "
+ "too small (less than 20000 characters), it cannot annotate them "
+ "with the default options. Add this option to use 'meta' procedure.")
optional.add_argument("--l90", dest="l90", type=int, default=100,
help="Maximum value of L90 allowed to keep a genome. Default is 100.")
optional.add_argument("--nbcont", dest="nbcont", type=utils_argparse.cont_num, default=999,
@@ -582,6 +589,10 @@ def check_args(parser, args):
return split + trust
# ERRORS
+ # Cannot be verbose and quiet at the same time
+ if args.verbose > 0 and args.quiet:
+ parser.error("Choose between a verbose output (-v) or a quiet output (-q)."
+ " You cannot have both.")
# User wants to run all annotation step: needs a genome dataset name
if not args.qc_only and not args.name:
parser.error("You must specify your genomes dataset name in 4 characters with "
@@ -591,10 +602,10 @@ def check_args(parser, args):
# If QC only, we do not need name -> name is NONE
if args.qc_only and not args.name:
args.name = "NONE"
- # Cannot be verbose and quiet at the same time
- if args.verbose > 0 and args.quiet:
- parser.error("Choose between a verbose output (-v) or a quiet output (-q)."
- " You cannot have both.")
+ # option --small used only with prodigal
+ if not args.prodigal_only and args.small:
+ parser.error("You cannot use --small option with prokka. Either use prodigal, "
+ "or remove this option.")
# If user specifies a cutN value (different than default one which is 5), and give
# an info file, it is not compatible: info file will use sequences as is, and won't cut them
if args.cutn != 5 and args.from_info: