diff --git a/PanACoTA/prepare_module/download_genomes_func.py b/PanACoTA/prepare_module/download_genomes_func.py index 2b11ac8ef769c08d4aef6b6afb406ca2bb44c456..ef091f19c21b4bdd7ed26433f3de0a16a4806cd4 100644 --- a/PanACoTA/prepare_module/download_genomes_func.py +++ b/PanACoTA/prepare_module/download_genomes_func.py @@ -13,7 +13,6 @@ import shutil import sys import glob import urllib.request -import progressbar import ncbi_genome_download as ngd from PanACoTA import utils diff --git a/PanACoTA/prepare_module/filter_genomes.py b/PanACoTA/prepare_module/filter_genomes.py index 433717e665b6861bdac3fd478d91c8c8adfd26fa..2356f40f36095f142df7a3b54425552221917c08 100755 --- a/PanACoTA/prepare_module/filter_genomes.py +++ b/PanACoTA/prepare_module/filter_genomes.py @@ -101,7 +101,8 @@ def sort_genomes_minhash(genomes, max_l90, max_cont): return sorted_genomes -def iterative_mash(sorted_genomes, genomes, outdir, species_linked, min_dist, max_dist, threads): +def iterative_mash(sorted_genomes, genomes, outdir, species_linked, min_dist, max_dist, + threads, quiet): """ Run mash all vs all, to get all pairwise distances. Then, take the first genome of the list, and remove those for which the distance to it @@ -124,6 +125,8 @@ def iterative_mash(sorted_genomes, genomes, outdir, species_linked, min_dist, ma max limit of distance between 2 genomes to keep them threads : max number of threads to use + quiet : bool + True if nothing must be sent to stdout/stderr, False otherwise Returns ------- @@ -181,19 +184,22 @@ def iterative_mash(sorted_genomes, genomes, outdir, species_linked, min_dist, ma # Iteratively discard genomes too close or too far logger.info("Starting iterative discarding steps") - widgets = ['Genomes compared: ', - progressbar.Bar(marker='█', left='', right='', fill=' '), ' ', - progressbar.Counter(), "/{}".format(nbgen), ' ', - progressbar.Timer(), ' - ' - ] - bar = progressbar.ProgressBar(widgets=widgets, max_value=len(to_try), term_width=100).start() - done = 0 + if not quiet: + widgets = ['Genomes compared: ', + progressbar.Bar(marker='█', left='', right='', fill=' '), ' ', + progressbar.Counter(), "/{}".format(nbgen), ' ', + progressbar.Timer(), ' - ' + ] + bar = progressbar.ProgressBar(widgets=widgets, max_value=len(to_try), term_width=100).start() + done = 0 while len(to_try) > 1: mash_step(to_try, corresp_file, mat_sp, genomes_removed, min_dist, max_dist) - done = nbgen - len(to_try) - bar.update(done) - bar.finish() + if not quiet: + done = nbgen - len(to_try) + bar.update(done) + if not quiet: + bar.finish() logger.info("Final number of genomes in dataset: {}".format(nbgen - len(genomes_removed))) return genomes_removed diff --git a/PanACoTA/subcommands/prepare.py b/PanACoTA/subcommands/prepare.py index 5deaa300877a73ef31c9cc20df15f6009531c32f..16ae3ae0eeb79f2d2e689c11db283a4863245cc4 100644 --- a/PanACoTA/subcommands/prepare.py +++ b/PanACoTA/subcommands/prepare.py @@ -204,7 +204,7 @@ def main(cmd, NCBI_species, NCBI_taxid, outdir, tmp_dir, threads, no_refseq, onl # sorted_genome : [genome_file] ordered by L90/nbcont (keys of genomes) sorted_genomes = fg.sort_genomes_minhash(genomes, l90, nbcont) removed = fg.iterative_mash(sorted_genomes, genomes, outdir, species_linked, - min_dist, max_dist, threads) + min_dist, max_dist, threads, quiet) # Write list of genomes kept, and list of genomes removed fg.write_outputfiles(genomes, sorted_genomes, removed, outdir, species_linked, min_dist) logger.info("End") @@ -355,6 +355,11 @@ def check_args(parser, args): parser.error("If you want to run only Mash filtering steps, please give the " "info file with the required information (see '--info' option)") + # Cannot be verbose and quiet at the same time + if args.verbose > 0 and args.quiet: + parser.error("Choose between a verbose output (-v) or a quiet output (-q)." + " You cannot have both.") + # WARNINGS # User did not specify a species name if not args.NCBI_species: