Skip to content
Snippets Groups Projects
Commit f6510f0d authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

Add possibility to choose assembly level while downloading genomes from NCBI

parent 0b90c4b7
No related branches found
No related tags found
No related merge requests found
Pipeline #39521 failed
......@@ -53,7 +53,7 @@ from PanACoTA import utils
logger = logging.getLogger("prepare.dds")
def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, outdir, threads):
def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels, outdir, threads):
"""
Download refseq genomes of given species
......@@ -79,7 +79,7 @@ def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, outdir, threa
"""
# Name of summary file, with metadata for each strain:
sumfile = os.path.join(outdir, "assembly_summary-{}.txt".format(species_linked))
sumfile = os.path.join(outdir, f"assembly_summary-{species_linked}.txt")
abs_sumfile = os.path.abspath(sumfile)
# arguments needed to download all genomes of the given species
......@@ -99,6 +99,10 @@ def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, outdir, threa
message += f" (NCBI_taxid = {NCBI_taxid})."
else:
message += f" NCBI_taxid = {NCBI_taxid}"
# If assembly level(s) given, add it to arguments, and write to info message
if levels:
keyargs["assembly_levels"] = levels
message += f" (Only those assembly levels: {levels}). "
logger.info(f"Metadata for all genomes will be saved in {sumfile}")
logger.info(message)
......
......@@ -204,9 +204,10 @@ def test_download():
NCBI_taxid = "104099"
outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq")
threads = 1
levels = ""
db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid,
outdir, threads)
db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels,
outdir, threads)
# Check path to uncompressed files is as expected
assert db_dir == os.path.join(outdir, "Database_init")
# Check number of genomes downloaded. We cannot know the exact value, as it is updated everyday. But in nov. 2019, there are 4 genomes. So, there must be at least those 4 genomes
......@@ -241,8 +242,9 @@ def test_download_noSpeName():
NCBI_taxid = "104099"
outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq_noSpe")
threads = 1
levels = ""
db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid,
db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels,
outdir, threads)
# Check path to uncompressed files is as expected
......@@ -277,8 +279,9 @@ def test_download_wrongTaxID(caplog):
NCBI_taxid = "10409"
outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq_wrongTaxID")
threads = 1
levels = ""
with pytest.raises(SystemExit):
downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid,
downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels,
outdir, threads)
# Check path to uncompressed files does not exist
......@@ -311,8 +314,9 @@ def test_download_diffSpeTaxID(caplog):
NCBI_taxid = "104099"
outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq_wrongTaxID")
threads = 1
levels = ""
with pytest.raises(SystemExit):
downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid,
downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels,
outdir, threads)
# Check path to uncompressed files does not exist
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment