diff --git a/PanACoTA/prepare_module/download_genomes_func.py b/PanACoTA/prepare_module/download_genomes_func.py index bbab0ad92bfa532b4c554f42c36658aa9da04e86..e820316cc89e4c6f6f2003667a5ac284a583d6a0 100644 --- a/PanACoTA/prepare_module/download_genomes_func.py +++ b/PanACoTA/prepare_module/download_genomes_func.py @@ -53,7 +53,7 @@ from PanACoTA import utils logger = logging.getLogger("prepare.dds") -def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, outdir, threads): +def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels, outdir, threads): """ Download refseq genomes of given species @@ -79,7 +79,7 @@ def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, outdir, threa """ # Name of summary file, with metadata for each strain: - sumfile = os.path.join(outdir, "assembly_summary-{}.txt".format(species_linked)) + sumfile = os.path.join(outdir, f"assembly_summary-{species_linked}.txt") abs_sumfile = os.path.abspath(sumfile) # arguments needed to download all genomes of the given species @@ -99,6 +99,10 @@ def download_from_refseq(species_linked, NCBI_species, NCBI_taxid, outdir, threa message += f" (NCBI_taxid = {NCBI_taxid})." else: message += f" NCBI_taxid = {NCBI_taxid}" + # If assembly level(s) given, add it to arguments, and write to info message + if levels: + keyargs["assembly_levels"] = levels + message += f" (Only those assembly levels: {levels}). " logger.info(f"Metadata for all genomes will be saved in {sumfile}") logger.info(message) diff --git a/test/test_unit/test_prepare/test_download.py b/test/test_unit/test_prepare/test_download.py index 4e7a86758ebc0ef684c87522fffb78e08185e455..58aed49b2fb09902e2f7733f8eb731e70d59c0ea 100755 --- a/test/test_unit/test_prepare/test_download.py +++ b/test/test_unit/test_prepare/test_download.py @@ -204,9 +204,10 @@ def test_download(): NCBI_taxid = "104099" outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq") threads = 1 + levels = "" - db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, - outdir, threads) + db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels, + outdir, threads) # Check path to uncompressed files is as expected assert db_dir == os.path.join(outdir, "Database_init") # Check number of genomes downloaded. We cannot know the exact value, as it is updated everyday. But in nov. 2019, there are 4 genomes. So, there must be at least those 4 genomes @@ -241,8 +242,9 @@ def test_download_noSpeName(): NCBI_taxid = "104099" outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq_noSpe") threads = 1 + levels = "" - db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, + db_dir, nb_gen = downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels, outdir, threads) # Check path to uncompressed files is as expected @@ -277,8 +279,9 @@ def test_download_wrongTaxID(caplog): NCBI_taxid = "10409" outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq_wrongTaxID") threads = 1 + levels = "" with pytest.raises(SystemExit): - downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, + downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels, outdir, threads) # Check path to uncompressed files does not exist @@ -311,8 +314,9 @@ def test_download_diffSpeTaxID(caplog): NCBI_taxid = "104099" outdir = os.path.join(DATA_TEST_DIR, "test_download_refseq_wrongTaxID") threads = 1 + levels = "" with pytest.raises(SystemExit): - downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, + downg.download_from_refseq(species_linked, NCBI_species, NCBI_taxid, levels, outdir, threads) # Check path to uncompressed files does not exist