Skip to content
Snippets Groups Projects
Commit 684b7dec authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

Add option to give assembly level to download

parent ca156e87
No related branches found
No related tags found
No related merge requests found
......@@ -66,14 +66,14 @@ def main_from_parse(arguments):
"""
cmd = "PanACoTA " + ' '.join(arguments.argv)
main(cmd, arguments.NCBI_species, arguments.NCBI_species_taxid, arguments.outdir,
arguments.tmp_dir, arguments.parallel, arguments.no_refseq, arguments.db_dir,
arguments.only_mash,
main(cmd, arguments.NCBI_species, arguments.NCBI_species_taxid, arguments.level,
arguments.outdir, arguments.tmp_dir, arguments.parallel, arguments.no_refseq,
arguments.db_dir, arguments.only_mash,
arguments.from_info, arguments.l90, arguments.nbcont, arguments.cutn, arguments.min_dist,
arguments.max_dist, arguments.verbose, arguments.quiet)
def main(cmd, NCBI_species, NCBI_taxid, outdir, tmp_dir, threads, no_refseq, db_dir,
def main(cmd, NCBI_species, NCBI_taxid, levels, outdir, tmp_dir, threads, no_refseq, db_dir,
only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, verbose, quiet):
"""
Main method, constructing the draft dataset for the given species
......@@ -239,7 +239,7 @@ def main(cmd, NCBI_species, NCBI_taxid, outdir, tmp_dir, threads, no_refseq, db_
else:
# Download all genomes of the given taxID
db_dir, nb_gen = dgf.download_from_refseq(species_linked, NCBI_species, NCBI_taxid,
outdir, threads)
levels, outdir, threads)
logger.info("{} refseq genome(s) downloaded".format(nb_gen))
# Now that genomes are downloaded and uncompressed, check their quality to remove bad ones
......@@ -290,13 +290,21 @@ def build_parser(parser):
general = parser.add_argument_group('General arguments')
general.add_argument("-t", dest="NCBI_species_taxid", default="",
help=("Species taxid to download, corresponding to the "
"'species taxid' provided by the NCBI")
"'species taxid' provided by the NCBI. A comma-separated "
"list of taxid can also be provided.")
)
general.add_argument("-s", dest="NCBI_species", default="",
help=("Species to download, corresponding to the "
"'organism name' provided by the NCBI. Give name between "
"quotes (for example \"escherichia coli\")")
)
general.add_argument("-l", "--assembly_level", dest="levels", default="",
help=("Assembly levels of genomes to download (default: all). "
"Possible levels are: 'all', 'complete', 'chromosome', "
"'scaffold', 'contig'."
"You can also provide a comma-separated list of assembly "
"levels. For ex: 'complete,chromosome'")
)
general.add_argument("-o", dest="outdir",
help=("Give the path to the directory where you want to save the "
"downloaded database. In the given directory, it will create "
......@@ -460,6 +468,15 @@ def check_args(parser, args):
parser.error(f"min_dist ({args.min_dist}) cannot be higher "
f"than max_dist ({args.max_dist})")
# Check that levels, if given, are among possible ones
possible = ["all", "complete", "chromosome", "scaffold", "contig"]
if args.levels:
for level in args.levels.split(","):
if level not in possible:
parser.error("Please choose between available assembly levels: 'all', 'complete', "
"'chromosome', 'scaffold', 'contig'. If several levels, provide a "
f"comma-separated list. Invalid value: '{args.levels}'")
# WARNINGS
# User did not specify a species name
if not args.NCBI_species:
......
......@@ -115,6 +115,36 @@ def test_parser_wrong_cont(capsys):
assert "argument --nbcont: invalid int value: 10.5" in err
def test_parser_wrong_level(capsys):
"""
Test that when the script is called with a non integer limit of contig number,
it returns an error message
"""
parser = argparse.ArgumentParser(description="prepare", add_help=False)
prepare.build_parser(parser)
with pytest.raises(SystemExit):
prepare.parse(parser, "-t 1234 -o toto -l toto".split())
_, err = capsys.readouterr()
assert ("Please choose between available assembly levels: 'all', 'complete', "
"'chromosome', 'scaffold', 'contig'. If several levels, provide a "
"comma-separated list. Invalid value: 'toto'") in err
def test_parser_wrong_level_notcomma(capsys):
"""
Test that when the script is called with a non integer limit of contig number,
it returns an error message
"""
parser = argparse.ArgumentParser(description="prepare", add_help=False)
prepare.build_parser(parser)
with pytest.raises(SystemExit):
prepare.parse(parser, "-t 1234 -o outdir -l complete.scaffold".split())
_, err = capsys.readouterr()
assert ("Please choose between available assembly levels: 'all', 'complete', "
"'chromosome', 'scaffold', 'contig'. If several levels, provide a "
"comma-separated list. Invalid value: 'complete.scaffold'") in err
def test_max_mash_dist(capsys):
"""
Test that when user is giving a number for max_dist which is not valid:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment