Skip to content
Snippets Groups Projects
Commit c8ce71c0 authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

Adapt functional tests for new feature in prepare

parent f5469400
No related branches found
No related tags found
No related merge requests found
...@@ -155,9 +155,11 @@ def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, ncbi_strains, l ...@@ -155,9 +155,11 @@ def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, ncbi_strains, l
elif ncbi_strains: elif ncbi_strains:
if os.path.isfile(ncbi_strains): if os.path.isfile(ncbi_strains):
species_linked = os.path.basename(ncbi_strains) species_linked = os.path.basename(ncbi_strains)
species_linked = os.path.splitext(species_linked)[0]
else: else:
species_linked = "_".join(ncbi_strains.split()) species_linked = "_".join(ncbi_strains.split())
species_linked = "-".join(ncbi_strains.split("/")) species_linked = "-".join(species_linked.split("/"))
species_linked = "_and_".join(species_linked.split(","))
# if neither speName, speID, taxID nor strainName given (--norefseq, mashonly), name is NA # if neither speName, speID, taxID nor strainName given (--norefseq, mashonly), name is NA
else: else:
species_linked = "NA" species_linked = "NA"
......
...@@ -58,6 +58,7 @@ def test_main_from_parse(): ...@@ -58,6 +58,7 @@ def test_main_from_parse():
args.ncbi_species_name = "Acetobacter orleanensis" args.ncbi_species_name = "Acetobacter orleanensis"
args.ncbi_species_taxid = "104099" args.ncbi_species_taxid = "104099"
args.ncbi_taxid = "" args.ncbi_taxid = ""
args.strains = ""
args.ncbi_section = "refseq" args.ncbi_section = "refseq"
args.outdir = GENEPATH args.outdir = GENEPATH
args.tmp_dir = "" args.tmp_dir = ""
...@@ -105,6 +106,7 @@ def test_main_from_parse_longspeciesname(): ...@@ -105,6 +106,7 @@ def test_main_from_parse_longspeciesname():
args.ncbi_species_name = "Salmonella enterica subsp. enterica serovar Paratyphi C" args.ncbi_species_name = "Salmonella enterica subsp. enterica serovar Paratyphi C"
args.ncbi_species_taxid = "" args.ncbi_species_taxid = ""
args.ncbi_taxid = "" args.ncbi_taxid = ""
args.strains = ""
args.ncbi_section = "refseq" args.ncbi_section = "refseq"
args.outdir = GENEPATH args.outdir = GENEPATH
args.tmp_dir = "" args.tmp_dir = ""
...@@ -143,6 +145,104 @@ def test_main_from_parse_longspeciesname(): ...@@ -143,6 +145,104 @@ def test_main_from_parse_longspeciesname():
assert len(fna_files) >= 1 assert len(fna_files) >= 1
def test_main_only_strainname():
"""
Only give strain names (no spe taxid etc). Chack that they are downloaded,
and that the summary file has the expected name.
"""
NCBI_species_name = ""
NCBI_species_taxid = ""
NCBI_taxid = ""
NCBI_section = "refseq"
NCBI_strains = "AS001254,KPPR1,LMG 1583"
levels = ""
outdir = GENEPATH
tmp_dir = os.path.join(outdir, 'tmp')
threads = 1
norefseq = False
db_dir = ""
only_mash = False
info_file = ""
l90 = 100
nbcont = 999
cutn = 5
min_dist = 1e-4
max_dist = 0.06
verbose = 2
quiet = False
out_info_file = os.path.join(outdir, "LSTINFO-AS001254_and_KPPR1_and_LMG_1583-filtered-0.0001_0.06.txt")
assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels, NCBI_section, outdir, tmp_dir,
threads, norefseq, db_dir, only_mash, info_file, l90, nbcont,
cutn, min_dist, max_dist, verbose, quiet) == out_info_file
# Check output files
summary = os.path.join(GENEPATH, "assembly_summary-AS001254_and_KPPR1_and_LMG_1583.txt")
assert os.path.isfile(summary)
# Check that the NCBI_genome_download output directory exists
ngd_outdir = os.path.join(GENEPATH, "refseq", "bacteria")
# And that it contains folders
assert os.path.isdir(ngd_outdir)
assert len(os.listdir(ngd_outdir)) == 3
# Check logfiles are here
log_files = glob.glob(os.path.join(GENEPATH, "*log*"))
assert len(log_files) == 3
# Check tmp files folder created, with the 3 strain files
tmp_files = glob.glob(os.path.join(tmp_dir, "*.fna_prepare-split5N.fna"))
assert len(tmp_files) == 3
# Check Database_init folder created, with all 3 ".fna" genomes
fna_files = glob.glob(os.path.join(GENEPATH, "Database_init", "*.fna"))
assert len(fna_files) == 3
def test_main_only_strainname_file():
"""
Only give strain names (no spe taxid etc). Chack that they are downloaded,
and that the summary file has the expected name.
"""
NCBI_species_name = ""
NCBI_species_taxid = ""
NCBI_taxid = ""
NCBI_section = "refseq"
NCBI_strains = os.path.join(TEST_DIR, "test_list-strains.txt")
levels = ""
outdir = GENEPATH
tmp_dir = os.path.join(outdir, 'tmp')
threads = 1
norefseq = False
db_dir = ""
only_mash = False
info_file = ""
l90 = 100
nbcont = 999
cutn = 5
min_dist = 1e-4
max_dist = 0.06
verbose = 2
quiet = False
out_info_file = os.path.join(outdir, "LSTINFO-test_list-strains-filtered-0.0001_0.06.txt")
assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels, NCBI_section, outdir, tmp_dir,
threads, norefseq, db_dir, only_mash, info_file, l90, nbcont,
cutn, min_dist, max_dist, verbose, quiet) == out_info_file
# Check output files
summary = os.path.join(GENEPATH, "assembly_summary-test_list-strains.txt")
assert os.path.isfile(summary)
# Check that the NCBI_genome_download output directory exists
ngd_outdir = os.path.join(GENEPATH, "refseq", "bacteria")
# And that it contains folders
assert os.path.isdir(ngd_outdir)
assert len(os.listdir(ngd_outdir)) == 3
# Check logfiles are here
log_files = glob.glob(os.path.join(GENEPATH, "*log*"))
assert len(log_files) == 3
# Check tmp files folder created, with the 3 strain files
tmp_files = glob.glob(os.path.join(tmp_dir, "*.fna_prepare-split5N.fna"))
assert len(tmp_files) == 3
# Check Database_init folder created, with all 3 ".fna" genomes
fna_files = glob.glob(os.path.join(GENEPATH, "Database_init", "*.fna"))
assert len(fna_files) == 3
def test_main_not_only_mash_infoexists(): def test_main_not_only_mash_infoexists():
""" """
We run without option only_mash, but still provide a lstinfo file We run without option only_mash, but still provide a lstinfo file
...@@ -152,6 +252,7 @@ def test_main_not_only_mash_infoexists(): ...@@ -152,6 +252,7 @@ def test_main_not_only_mash_infoexists():
NCBI_species_taxid = "104099" NCBI_species_taxid = "104099"
NCBI_taxid = "" NCBI_taxid = ""
NCBI_section = "refseq" NCBI_section = "refseq"
NCBI_strains = ""
levels = "" levels = ""
outdir = GENEPATH outdir = GENEPATH
tmp_dir = os.path.join(outdir, "temporary_directory") tmp_dir = os.path.join(outdir, "temporary_directory")
...@@ -169,7 +270,7 @@ def test_main_not_only_mash_infoexists(): ...@@ -169,7 +270,7 @@ def test_main_not_only_mash_infoexists():
verbose = 2 verbose = 2
quiet = False quiet = False
out_info_file = os.path.join(outdir, "LSTINFO-104099-filtered-0.0001_0.06.txt") out_info_file = os.path.join(outdir, "LSTINFO-104099-filtered-0.0001_0.06.txt")
assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, NCBI_section, outdir, tmp_dir, assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels, NCBI_section, outdir, tmp_dir,
threads, norefseq, db_dir, only_mash, info_file, l90, nbcont, threads, norefseq, db_dir, only_mash, info_file, l90, nbcont,
cutn, min_dist, max_dist, verbose, quiet) == out_info_file cutn, min_dist, max_dist, verbose, quiet) == out_info_file
...@@ -204,10 +305,11 @@ def test_main_wrong_taxid(capsys): ...@@ -204,10 +305,11 @@ def test_main_wrong_taxid(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_taxid = "123" NCBI_taxid = "123"
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_strains = ""
NCBI_section = "genbank" NCBI_section = "genbank"
levels = "" levels = ""
outdir = "" outdir = GENEPATH
tmp_dir = os.path.join("123", "temporary_directory") tmp_dir = os.path.join(GENEPATH, "123", "temporary_directory")
threads = 1 threads = 1
norefseq = False norefseq = False
info_file = "" info_file = ""
...@@ -220,31 +322,28 @@ def test_main_wrong_taxid(capsys): ...@@ -220,31 +322,28 @@ def test_main_wrong_taxid(capsys):
max_dist = 0.06 max_dist = 0.06
verbose = 2 verbose = 2
quiet = False quiet = False
res_outdir = "123"
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, NCBI_section, prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels, NCBI_section,
outdir, tmp_dir, threads, norefseq, outdir, tmp_dir, threads, norefseq,
db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist,
verbose, quiet) verbose, quiet)
_, err = capsys.readouterr() _, err = capsys.readouterr()
assert ("Could not download genomes. Check that you gave valid NCBI taxid and/or " assert ("No strain correspond to your request. If you are sure there should have "
"NCBI species name. If you gave both, check that given taxID and name really " "some, check that you gave valid NCBI taxid and/or "
"correspond to the same species.") in err "NCBI species name and/or NCBI strain name. If you gave several, check that "
"given taxIDs and names are compatible.") in err
# Check output files # Check output files
summary = os.path.join(res_outdir, "assembly_summary-123.txt") summary = os.path.join(outdir, "assembly_summary-123.txt")
assert not os.path.isfile(summary) assert not os.path.isfile(summary)
ngd_outdir = os.path.join(res_outdir, "genbank", "bacteria") ngd_outdir = os.path.join(outdir, "genbank", "bacteria")
assert not os.path.isdir(ngd_outdir) assert not os.path.isdir(ngd_outdir)
# # Check logfiles are here # # Check logfiles are here
log_files = glob.glob(os.path.join(res_outdir, "*log*")) log_files = glob.glob(os.path.join(outdir, "*log*"))
assert len(log_files) == 3 assert len(log_files) == 3
# Check tmp files folder created, but empty asnothing is downloaded # Check tmp files folder created, but empty asnothing is downloaded
assert len(os.listdir(tmp_dir)) == 0 assert len(os.listdir(tmp_dir)) == 0
# Check Database_init folder created, with at list 4 ".fna" genomes # Check Database_init folder created, with at list 4 ".fna" genomes
assert not os.path.isdir(os.path.join(res_outdir, "Database_init")) assert not os.path.isdir(os.path.join(outdir, "Database_init"))
# Remove output directory
shutil.rmtree(res_outdir, ignore_errors=True)
def test_main_norefseq_wrongdbpath(capsys): def test_main_norefseq_wrongdbpath(capsys):
...@@ -255,6 +354,7 @@ def test_main_norefseq_wrongdbpath(capsys): ...@@ -255,6 +354,7 @@ def test_main_norefseq_wrongdbpath(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "refseq" NCBI_section = "refseq"
levels = "" levels = ""
outdir = GENEPATH outdir = GENEPATH
...@@ -272,7 +372,7 @@ def test_main_norefseq_wrongdbpath(capsys): ...@@ -272,7 +372,7 @@ def test_main_norefseq_wrongdbpath(capsys):
quiet = False quiet = False
info_file = "" info_file = ""
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, NCBI_section, prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels, NCBI_section,
outdir, tmp_dir, threads, norefseq, outdir, tmp_dir, threads, norefseq,
db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist,
verbose, quiet) verbose, quiet)
...@@ -302,6 +402,7 @@ def test_main_norefseq_nodefault_dbdir_nor_refseq(capsys): ...@@ -302,6 +402,7 @@ def test_main_norefseq_nodefault_dbdir_nor_refseq(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "genbank" NCBI_section = "genbank"
levels = "" levels = ""
outdir = GENEPATH outdir = GENEPATH
...@@ -319,7 +420,7 @@ def test_main_norefseq_nodefault_dbdir_nor_refseq(capsys): ...@@ -319,7 +420,7 @@ def test_main_norefseq_nodefault_dbdir_nor_refseq(capsys):
quiet = False quiet = False
info_file = "" info_file = ""
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
NCBI_section, outdir, tmp_dir, threads, norefseq, NCBI_section, outdir, tmp_dir, threads, norefseq,
db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist,
verbose, quiet) verbose, quiet)
...@@ -359,6 +460,7 @@ def test_main_norefseq_nodefault_dbdir_but_refseq(capsys): ...@@ -359,6 +460,7 @@ def test_main_norefseq_nodefault_dbdir_but_refseq(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "123" NCBI_species_taxid = "123"
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "genbank" NCBI_section = "genbank"
levels = "" levels = ""
# Copy refseq/bacteria and content into outdirectory # Copy refseq/bacteria and content into outdirectory
...@@ -380,7 +482,7 @@ def test_main_norefseq_nodefault_dbdir_but_refseq(capsys): ...@@ -380,7 +482,7 @@ def test_main_norefseq_nodefault_dbdir_but_refseq(capsys):
quiet = False quiet = False
info_file = "" info_file = ""
out_info_file = os.path.join(outdir, f"LSTINFO-123-filtered-0.0001_0.06.txt") out_info_file = os.path.join(outdir, f"LSTINFO-123-filtered-0.0001_0.06.txt")
assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
NCBI_section, outdir, tmp_dir, threads, NCBI_section, outdir, tmp_dir, threads,
norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist,
max_dist, verbose, quiet) == out_info_file max_dist, verbose, quiet) == out_info_file
...@@ -419,6 +521,7 @@ def test_main_norefseq_defaultdbdir(capsys): ...@@ -419,6 +521,7 @@ def test_main_norefseq_defaultdbdir(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "refseq" NCBI_section = "refseq"
levels = "" levels = ""
# Copy refseq/bacteria and content into outdirectory # Copy refseq/bacteria and content into outdirectory
...@@ -440,7 +543,7 @@ def test_main_norefseq_defaultdbdir(capsys): ...@@ -440,7 +543,7 @@ def test_main_norefseq_defaultdbdir(capsys):
quiet = False quiet = False
info_file = "" info_file = ""
out_info_file = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt") out_info_file = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt")
assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
NCBI_section, outdir, tmp_dir, threads, NCBI_section, outdir, tmp_dir, threads,
norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist,
max_dist, verbose, quiet) == out_info_file max_dist, verbose, quiet) == out_info_file
...@@ -472,6 +575,7 @@ def test_main_norefseq_givendbdir(capsys): ...@@ -472,6 +575,7 @@ def test_main_norefseq_givendbdir(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "refseq" NCBI_section = "refseq"
levels = "" levels = ""
# Copy refseq/bacteria and content into outdirectory # Copy refseq/bacteria and content into outdirectory
...@@ -493,7 +597,7 @@ def test_main_norefseq_givendbdir(capsys): ...@@ -493,7 +597,7 @@ def test_main_norefseq_givendbdir(capsys):
quiet = False quiet = False
info_file = "" info_file = ""
out_info_file = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt") out_info_file = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt")
assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
NCBI_section, outdir, tmp_dir, threads, NCBI_section, outdir, tmp_dir, threads,
norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist,
max_dist, verbose, quiet) == out_info_file max_dist, verbose, quiet) == out_info_file
...@@ -521,6 +625,7 @@ def test_only_mash(capsys): ...@@ -521,6 +625,7 @@ def test_only_mash(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "refseq" NCBI_section = "refseq"
levels = "" levels = ""
outdir = GENEPATH outdir = GENEPATH
...@@ -538,7 +643,7 @@ def test_only_mash(capsys): ...@@ -538,7 +643,7 @@ def test_only_mash(capsys):
verbose = 1 verbose = 1
quiet = False quiet = False
out_info_file = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt") out_info_file = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt")
assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
NCBI_section, outdir, tmp_dir, threads, NCBI_section, outdir, tmp_dir, threads,
norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist,
max_dist, verbose, quiet) == out_info_file max_dist, verbose, quiet) == out_info_file
...@@ -569,6 +674,7 @@ def test_only_mash_empty_lstinfo(capsys): ...@@ -569,6 +674,7 @@ def test_only_mash_empty_lstinfo(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "refseq" NCBI_section = "refseq"
levels = "" levels = ""
outdir = GENEPATH outdir = GENEPATH
...@@ -588,7 +694,7 @@ def test_only_mash_empty_lstinfo(capsys): ...@@ -588,7 +694,7 @@ def test_only_mash_empty_lstinfo(capsys):
verbose = 1 verbose = 1
quiet = False quiet = False
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
NCBI_section, outdir, tmp_dir, threads, norefseq, NCBI_section, outdir, tmp_dir, threads, norefseq,
db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist,
verbose, quiet) verbose, quiet)
...@@ -615,6 +721,7 @@ def test_only_mash_no_lstinfo(capsys): ...@@ -615,6 +721,7 @@ def test_only_mash_no_lstinfo(capsys):
NCBI_species_name = "" NCBI_species_name = ""
NCBI_species_taxid = "" NCBI_species_taxid = ""
NCBI_taxid = "" NCBI_taxid = ""
NCBI_strains = ""
NCBI_section = "refseq" NCBI_section = "refseq"
levels = "" levels = ""
outdir = GENEPATH outdir = GENEPATH
...@@ -633,7 +740,7 @@ def test_only_mash_no_lstinfo(capsys): ...@@ -633,7 +740,7 @@ def test_only_mash_no_lstinfo(capsys):
verbose = 1 verbose = 1
quiet = False quiet = False
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, levels, prepare.main("cmd", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
NCBI_section, outdir, tmp_dir, threads, norefseq, NCBI_section, outdir, tmp_dir, threads, norefseq,
db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist,
verbose, quiet) verbose, quiet)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment