diff --git a/test/test_functional/test_align.py b/test/test_functional/test_align.py index cd9eb9bbe40bc7b8b425b474e623b86aa8bf5d5e..7af9683820b8926d08c46a0fb381a4341ff961a2 100755 --- a/test/test_functional/test_align.py +++ b/test/test_functional/test_align.py @@ -43,172 +43,172 @@ def setup_teardown_module(): print("teardown") -# def test_main(): -# """ -# Test that when giving a database, a persistent genome and a list of genomes, it extracts -# expected proteins by family, aligns each family, back-translates them, concatenates all -# families into one file and groups them by genome. -# """ -# corepers = os.path.join(TESTPATH, "test_pers0.99FX.lst") -# list_genomes = os.path.join("test", "data", "pangenome", "test_files", "list_to_pan.txt") -# dname = "TEST4" -# dbpath = os.path.join("test", "data", "pangenome", "test_files", "example_db") -# outdir = GENEPATH -# threads = 1 -# force = False -# cmd = "cmd" -# al.main(cmd, corepers, list_genomes, dname, dbpath, outdir, threads, force) -# # Check creation of the 3 subdirectories -# aldir = os.path.join(outdir, "Align-" + dname) -# listdir = os.path.join(outdir, "List-" + dname) -# treedir = os.path.join(outdir, "Phylo-" + dname) -# assert os.path.isdir(aldir) -# assert os.path.isdir(listdir) -# assert os.path.isdir(treedir) -# # Check content of listdir -# genomes = ["GEN2.1017.00001", "GEN4.1111.00001", "GENO.1017.00001", "GENO.1216.00002"] -# for gen in genomes: -# assert os.path.isfile(os.path.join(listdir, f"{dname}-getEntry_gen_{gen}.txt")) -# assert os.path.isfile(os.path.join(listdir, f"{dname}-getEntry_prt_{gen}.txt")) -# # Check content of aldir -# fams = [1, 4, 6, 8, 10, 11, 13, 14] -# for fam in fams: -# assert os.path.isfile(os.path.join(aldir, f'{dname}-current.{fam}.gen')) -# assert os.path.isfile(os.path.join(aldir, f'{dname}-current.{fam}.prt')) -# assert os.path.isfile(os.path.join(aldir, f'{dname}-current.{fam}.miss.lst')) -# assert os.path.isfile(os.path.join(aldir, f'{dname}-mafft-align.{fam}.aln')) -# assert os.path.isfile(os.path.join(aldir, f'{dname}-mafft-prt2nuc.{fam}.aln')) -# out_concat = os.path.join(aldir, dname + "-complete.cat.aln") -# exp_concat = os.path.join(EXPPATH, "exp_pers4genome-complete.cat.aln") -# assert tutil.compare_order_content(out_concat, exp_concat) -# # Check content of treedir -# out_grp = os.path.join(treedir, dname + ".grp.aln") -# exp_grp = os.path.join(EXPPATH, "exp_pers4genomes.grp.aln") -# assert tutil.compare_order_content(out_grp, exp_grp) -# # Check presence of log files, and log.err is empty -# base_log = os.path.join(outdir, "PanACoTA-align_" + dname + ".log") -# assert os.path.isfile(base_log) -# assert os.path.isfile(base_log + ".details") -# assert os.path.isfile(base_log + ".err") -# with open(base_log + ".err", "r") as bf: -# assert bf.readlines() == [] -# # Check logs -# with open(base_log + ".details", "r") as lc: -# log_content = lc.readlines() -# assert ("Reading PersGenome and constructing lists of missing genomes in " -# "each family") in " ".join(log_content) -# assert "Extracting proteins and genes from all genomes" in " ".join(log_content) -# for gen in genomes: -# assert "Extracting proteins and genes from {}".format(gen) in " ".join(log_content) -# assert ("Starting alignment of all families: protein alignment, back-translation to " -# "nucleotides, and add missing genomes in the family") in " ".join(log_content) -# for fam in fams: -# assert "Checking extractions for family {}".format(fam) in " ".join(log_content) -# assert "Aligning family {}".format(fam) in " ".join(log_content) -# assert "Back-translating family {}".format(fam) in " ".join(log_content) -# assert "Concatenating all alignment files" in " ".join(log_content) -# assert "Grouping alignments per genome" in " ".join(log_content) -# assert "END" in " ".join(log_content) +def test_main(): + """ + Test that when giving a database, a persistent genome and a list of genomes, it extracts + expected proteins by family, aligns each family, back-translates them, concatenates all + families into one file and groups them by genome. + """ + corepers = os.path.join(TESTPATH, "test_pers0.99FX.lst") + list_genomes = os.path.join("test", "data", "pangenome", "test_files", "list_to_pan.txt") + dname = "TEST4" + dbpath = os.path.join("test", "data", "pangenome", "test_files", "example_db") + outdir = GENEPATH + threads = 1 + force = False + cmd = "cmd" + al.main(cmd, corepers, list_genomes, dname, dbpath, outdir, threads, force) + # Check creation of the 3 subdirectories + aldir = os.path.join(outdir, "Align-" + dname) + listdir = os.path.join(outdir, "List-" + dname) + treedir = os.path.join(outdir, "Phylo-" + dname) + assert os.path.isdir(aldir) + assert os.path.isdir(listdir) + assert os.path.isdir(treedir) + # Check content of listdir + genomes = ["GEN2.1017.00001", "GEN4.1111.00001", "GENO.1017.00001", "GENO.1216.00002"] + for gen in genomes: + assert os.path.isfile(os.path.join(listdir, f"{dname}-getEntry_gen_{gen}.txt")) + assert os.path.isfile(os.path.join(listdir, f"{dname}-getEntry_prt_{gen}.txt")) + # Check content of aldir + fams = [1, 4, 6, 8, 10, 11, 13, 14] + for fam in fams: + assert os.path.isfile(os.path.join(aldir, f'{dname}-current.{fam}.gen')) + assert os.path.isfile(os.path.join(aldir, f'{dname}-current.{fam}.prt')) + assert os.path.isfile(os.path.join(aldir, f'{dname}-current.{fam}.miss.lst')) + assert os.path.isfile(os.path.join(aldir, f'{dname}-mafft-align.{fam}.aln')) + assert os.path.isfile(os.path.join(aldir, f'{dname}-mafft-prt2nuc.{fam}.aln')) + out_concat = os.path.join(aldir, dname + "-complete.cat.aln") + exp_concat = os.path.join(EXPPATH, "exp_pers4genome-complete.cat.aln") + assert tutil.compare_order_content(out_concat, exp_concat) + # Check content of treedir + out_grp = os.path.join(treedir, dname + ".grp.aln") + exp_grp = os.path.join(EXPPATH, "exp_pers4genomes.grp.aln") + assert tutil.compare_order_content(out_grp, exp_grp) + # Check presence of log files, and log.err is empty + base_log = os.path.join(outdir, "PanACoTA-align_" + dname + ".log") + assert os.path.isfile(base_log) + assert os.path.isfile(base_log + ".details") + assert os.path.isfile(base_log + ".err") + with open(base_log + ".err", "r") as bf: + assert bf.readlines() == [] + # Check logs + with open(base_log + ".details", "r") as lc: + log_content = lc.readlines() + assert ("Reading PersGenome and constructing lists of missing genomes in " + "each family") in " ".join(log_content) + assert "Extracting proteins and genes from all genomes" in " ".join(log_content) + for gen in genomes: + assert "Extracting proteins and genes from {}".format(gen) in " ".join(log_content) + assert ("Starting alignment of all families: protein alignment, back-translation to " + "nucleotides, and add missing genomes in the family") in " ".join(log_content) + for fam in fams: + assert "Checking extractions for family {}".format(fam) in " ".join(log_content) + assert "Aligning family {}".format(fam) in " ".join(log_content) + assert "Back-translating family {}".format(fam) in " ".join(log_content) + assert "Concatenating all alignment files" in " ".join(log_content) + assert "Grouping alignments per genome" in " ".join(log_content) + assert "END" in " ".join(log_content) -# def test_main_exist_ok(): -# """ -# Test main all files exist and are ok, no force -> end without error, with warnings on re-use -# """ -# corepers = os.path.join(TESTPATH, "test_pers0.99FX.lst") -# list_genomes = os.path.join("test", "data", "pangenome", "test_files", "list_to_pan.txt") -# dname = "TEST4exists" -# dbpath = os.path.join("test", "data", "pangenome", "test_files", "example_db") -# outdir = os.path.join(GENEPATH, "test_main_exist_ok") -# threads = 1 -# force = False -# cmd = "cmd test_main_exist_ok" -# # Create output directories and files -# aldir = os.path.join(outdir, "Align-" + dname) -# listdir = os.path.join(outdir, "List-" + dname) -# treedir = os.path.join(outdir, "Phylo-" + dname) -# os.makedirs(aldir) -# os.makedirs(listdir) -# os.makedirs(treedir) -# # Create content of listdir -# ex_listdir = os.path.join(EXPPATH, "exp_listdir-pers") -# genomes = ["GEN2.1017.00001", "GEN4.1111.00001", "GENO.1017.00001", "GENO.1216.00002"] -# for gen in genomes: -# outgen = os.path.join(listdir, f"{dname}-getEntry_gen_{gen}.txt") -# refgen = os.path.join(ex_listdir, f"getEntry_gen_{gen}") -# shutil.copyfile(refgen, outgen) -# outprt = os.path.join(listdir, f"{dname}-getEntry_prt_{gen}.txt") -# refprt = os.path.join(ex_listdir, f"getEntry_prt_{gen}") -# shutil.copyfile(refprt, outprt) -# # Create content of aldir -# ex_aldir = os.path.join(EXPPATH, "exp_aldir-pers") -# fams = [1, 4, 6, 8, 10, 11, 13, 14] -# for fam in fams: -# outgen = os.path.join(aldir, f'{dname}-current.{fam}.gen') -# refgen = os.path.join(ex_aldir, f"current.{fam}.gen") -# shutil.copyfile(refgen, outgen) -# outprt = os.path.join(aldir, f'{dname}-current.{fam}.prt') -# refprt = os.path.join(ex_aldir, f"current.{fam}.prt") -# shutil.copyfile(refprt, outprt) -# outmiss = os.path.join(aldir, f'{dname}-current.{fam}.miss.lst') -# refmiss = os.path.join(ex_aldir, f"current.{fam}.miss.lst") -# shutil.copyfile(refmiss, outmiss) -# outaln = os.path.join(aldir, f'{dname}-mafft-align.{fam}.aln') -# refaln = os.path.join(ex_aldir, f"mafft-align.{fam}.aln") -# shutil.copyfile(refaln, outaln) -# outbtr = os.path.join(aldir, f'{dname}-mafft-prt2nuc.{fam}.aln') -# refbtr = os.path.join(ex_aldir, f"mafft-prt2nuc.{fam}.aln") -# shutil.copyfile(refbtr, outbtr) -# outcat = os.path.join(aldir, dname + "-complete.cat.aln") -# refcat = os.path.join(EXPPATH, "exp_pers4genome-complete.cat.aln") -# shutil.copyfile(refcat, outcat) -# # Create content of treedir -# outgrp = os.path.join(treedir, dname + ".grp.aln") -# refgrp = os.path.join(EXPPATH, "exp_pers4genomes.grp.aln") -# shutil.copyfile(refgrp, outgrp) -# # Run align module -# al.main(cmd, corepers, list_genomes, dname, dbpath, outdir, threads, force) -# # Check logs -# logfile = os.path.join(outdir, "PanACoTA-align_TEST4exists.log.details") -# with open(logfile, "r") as lc: -# log_content = lc.readlines() -# assert ("Reading PersGenome and constructing lists of missing genomes in " -# "each family") in " ".join(log_content) -# for gen in genomes: -# assert (f"For genome {gen}, test/data/align/generated_by_func_tests/test_main_exist_ok/" -# f"List-TEST4exists/TEST4exists-getEntry_prt_{gen}.txt and test/data/align/" -# "generated_by_func_tests/test_main_exist_ok/List-TEST4exists/" -# f"TEST4exists-getEntry_gen_{gen}.txt already exist. The program " -# "will use them to extract proteins and genes. If you prefer to rewrite " -# "them, use option -F ") in " ".join(log_content) -# assert ("Starting alignment of all families: protein alignment, back-translation to " -# "nucleotides, and add missing genomes in the family") in " ".join(log_content) -# for fam in fams: -# assert "Checking extractions for family {}".format(fam) in " ".join(log_content) -# assert ("Alignment already done for family {}. The program will use it for next " -# "steps").format(fam) in " ".join(log_content) -# assert ("All extraction files already existing (see detailed log for more " -# "information)") in " ".join(log_content) -# assert ("All prt and gene files for all families already exist. The program will use them " -# "for the next step. If you want to re-extract a given family, remove its prt and " -# "gen extraction files. If you want to re-extract all families, use option -F " -# "(or --force).") in " ".join(log_content) -# assert ("Alignments already concatenated in " -# "test/data/align/generated_by_func_tests/test_main_exist_ok/Align-TEST4exists/" -# "TEST4exists-complete.cat.aln. Program will " -# "use it for next steps. If you want to redo it, remove it before " -# "running.") in " ".join(log_content) -# assert ("Alignments already grouped by genome in " -# "test/data/align/generated_by_func_tests/test_main_exist_ok/Phylo-TEST4exists/" -# "TEST4exists.grp.aln. Program will " -# "end.") in " ".join(log_content) -# assert "END" in " ".join(log_content) +def test_main_exist_ok(): + """ + Test main all files exist and are ok, no force -> end without error, with warnings on re-use + """ + corepers = os.path.join(TESTPATH, "test_pers0.99FX.lst") + list_genomes = os.path.join("test", "data", "pangenome", "test_files", "list_to_pan.txt") + dname = "TEST4exists" + dbpath = os.path.join("test", "data", "pangenome", "test_files", "example_db") + outdir = os.path.join(GENEPATH, "test_main_exist_ok") + threads = 1 + force = False + cmd = "cmd test_main_exist_ok" + # Create output directories and files + aldir = os.path.join(outdir, "Align-" + dname) + listdir = os.path.join(outdir, "List-" + dname) + treedir = os.path.join(outdir, "Phylo-" + dname) + os.makedirs(aldir) + os.makedirs(listdir) + os.makedirs(treedir) + # Create content of listdir + ex_listdir = os.path.join(EXPPATH, "exp_listdir-pers") + genomes = ["GEN2.1017.00001", "GEN4.1111.00001", "GENO.1017.00001", "GENO.1216.00002"] + for gen in genomes: + outgen = os.path.join(listdir, f"{dname}-getEntry_gen_{gen}.txt") + refgen = os.path.join(ex_listdir, f"getEntry_gen_{gen}") + shutil.copyfile(refgen, outgen) + outprt = os.path.join(listdir, f"{dname}-getEntry_prt_{gen}.txt") + refprt = os.path.join(ex_listdir, f"getEntry_prt_{gen}") + shutil.copyfile(refprt, outprt) + # Create content of aldir + ex_aldir = os.path.join(EXPPATH, "exp_aldir-pers") + fams = [1, 4, 6, 8, 10, 11, 13, 14] + for fam in fams: + outgen = os.path.join(aldir, f'{dname}-current.{fam}.gen') + refgen = os.path.join(ex_aldir, f"current.{fam}.gen") + shutil.copyfile(refgen, outgen) + outprt = os.path.join(aldir, f'{dname}-current.{fam}.prt') + refprt = os.path.join(ex_aldir, f"current.{fam}.prt") + shutil.copyfile(refprt, outprt) + outmiss = os.path.join(aldir, f'{dname}-current.{fam}.miss.lst') + refmiss = os.path.join(ex_aldir, f"current.{fam}.miss.lst") + shutil.copyfile(refmiss, outmiss) + outaln = os.path.join(aldir, f'{dname}-mafft-align.{fam}.aln') + refaln = os.path.join(ex_aldir, f"mafft-align.{fam}.aln") + shutil.copyfile(refaln, outaln) + outbtr = os.path.join(aldir, f'{dname}-mafft-prt2nuc.{fam}.aln') + refbtr = os.path.join(ex_aldir, f"mafft-prt2nuc.{fam}.aln") + shutil.copyfile(refbtr, outbtr) + outcat = os.path.join(aldir, dname + "-complete.cat.aln") + refcat = os.path.join(EXPPATH, "exp_pers4genome-complete.cat.aln") + shutil.copyfile(refcat, outcat) + # Create content of treedir + outgrp = os.path.join(treedir, dname + ".grp.aln") + refgrp = os.path.join(EXPPATH, "exp_pers4genomes.grp.aln") + shutil.copyfile(refgrp, outgrp) + # Run align module + al.main(cmd, corepers, list_genomes, dname, dbpath, outdir, threads, force) + # Check logs + logfile = os.path.join(outdir, "PanACoTA-align_TEST4exists.log.details") + with open(logfile, "r") as lc: + log_content = lc.readlines() + assert ("Reading PersGenome and constructing lists of missing genomes in " + "each family") in " ".join(log_content) + for gen in genomes: + assert (f"For genome {gen}, test/data/align/generated_by_func_tests/test_main_exist_ok/" + f"List-TEST4exists/TEST4exists-getEntry_prt_{gen}.txt and test/data/align/" + "generated_by_func_tests/test_main_exist_ok/List-TEST4exists/" + f"TEST4exists-getEntry_gen_{gen}.txt already exist. The program " + "will use them to extract proteins and genes. If you prefer to rewrite " + "them, use option -F ") in " ".join(log_content) + assert ("Starting alignment of all families: protein alignment, back-translation to " + "nucleotides, and add missing genomes in the family") in " ".join(log_content) + for fam in fams: + assert "Checking extractions for family {}".format(fam) in " ".join(log_content) + assert ("Alignment already done for family {}. The program will use it for next " + "steps").format(fam) in " ".join(log_content) + assert ("All extraction files already existing (see detailed log for more " + "information)") in " ".join(log_content) + assert ("All prt and gene files for all families already exist. The program will use them " + "for the next step. If you want to re-extract a given family, remove its prt and " + "gen extraction files. If you want to re-extract all families, use option -F " + "(or --force).") in " ".join(log_content) + assert ("Alignments already concatenated in " + "test/data/align/generated_by_func_tests/test_main_exist_ok/Align-TEST4exists/" + "TEST4exists-complete.cat.aln. Program will " + "use it for next steps. If you want to redo it, remove it before " + "running.") in " ".join(log_content) + assert ("Alignments already grouped by genome in " + "test/data/align/generated_by_func_tests/test_main_exist_ok/Phylo-TEST4exists/" + "TEST4exists.grp.aln. Program will " + "end.") in " ".join(log_content) + assert "END" in " ".join(log_content) def test_main_exist_emptygrp(capsys): """ test main all files exist but empty grp -> does nothing, grp is not checked if everything - before was ok + before was ok. grp must still be empty """ corepers = os.path.join(TESTPATH, "test_pers0.99FX.lst") list_genomes = os.path.join("test", "data", "pangenome", "test_files", "list_to_pan.txt") @@ -262,6 +262,8 @@ def test_main_exist_emptygrp(capsys): open(outgrp, "w").close() # Run align module al.main(cmd, corepers, list_genomes, dname, dbpath, outdir, threads, force, verbose=2) + # Check grp still empty + assert os.stat(outgrp).st_size == 0 # Check logs out, err = capsys.readouterr() logfile = os.path.join(outdir, "PanACoTA-align_TEST4empty-grp.log.details") @@ -355,6 +357,11 @@ def test_main_exist_emptycat(capsys): outf.write("It's me !!") # Run align module al.main(cmd, corepers, list_genomes, dname, dbpath, outdir, threads, force, verbose=16) + # Check concat and grp did not change + with open(outcat, "r") as of: + assert of.readlines() == ["Hello !!"] + with open(outgrp, "r") as of: + assert of.readlines() == ["It's me !!"] # Check logs out, err = capsys.readouterr() logfile = os.path.join(outdir, "PanACoTA-align_TEST4empty-cat.log.details") diff --git a/test/test_unit/test_align/test_postalign.py b/test/test_unit/test_align/test_postalign.py index b921e3f42aa6b4fb510e5b18c120d3bd3a1bbbd6..32b41dca205af6b5a916ce1a180d24a74e4600b8 100755 --- a/test/test_unit/test_align/test_postalign.py +++ b/test/test_unit/test_align/test_postalign.py @@ -201,7 +201,7 @@ def test_launch_gbg(caplog): open(out_grp, "w").close() dname = "TESTlaunch" quiet = False - assert pal.launch_group_by_genome(all_genomes, alnfile, status, treedir, dname, quiet) is True + assert pal.launch_group_by_genome(all_genomes, alnfile, status, out_grp, dname, quiet) is True exp_grp = os.path.join(EXPPATH, "exp_pers4genomes.grp.aln") assert tutil.compare_order_content(out_grp, exp_grp) assert "Grouping alignments per genome" in caplog.text @@ -224,7 +224,7 @@ def test_launch_gbg_existsempty(caplog): open(out_grp, "w").close() dname = "TESTlaunch" quiet = False - assert pal.launch_group_by_genome(all_genomes, alnfile, status, treedir, dname, quiet) is True + assert pal.launch_group_by_genome(all_genomes, alnfile, status, out_grp, dname, quiet) is True assert "Grouping alignments per genome" not in caplog.text with open(out_grp, "r") as outf: assert outf.readlines() == [] @@ -246,7 +246,7 @@ def test_launch_gbg_ok_notexist(caplog): out_grp = os.path.join(treedir, "TESTlaunch.grp.aln") dname = "TESTlaunch" quiet = False - assert pal.launch_group_by_genome(all_genomes, alnfile, status, treedir, dname, quiet) is True + assert pal.launch_group_by_genome(all_genomes, alnfile, status, out_grp, dname, quiet) is True exp_grp = os.path.join(EXPPATH, "exp_pers4genomes.grp.aln") assert tutil.compare_order_content(out_grp, exp_grp) assert "Grouping alignments per genome" in caplog.text