diff --git a/PanACoTA/align_module/pan_to_pergenome.py b/PanACoTA/align_module/pan_to_pergenome.py index df1ca40c5d3dd64802a5cfe4895f5062ca6ad3f6..6afcbfe858598e9b066d693102c8edf708b8adec 100755 --- a/PanACoTA/align_module/pan_to_pergenome.py +++ b/PanACoTA/align_module/pan_to_pergenome.py @@ -226,8 +226,7 @@ def write_genome_file(listdir, aldir, dname, strain, member, several): "option -F (or --force).".format(strain, geprtfile, gegenfile)) return - # If one of the 2 files already exists, overwrite both files (same behaviour - # as if no file exists) + # If at least one of the 2 files already exists, overwrite both files with open(gegenfile, "w") as gegf, open(geprtfile, "w") as gepf: for mem, fam in member.items(): if strain not in several[fam]: @@ -256,8 +255,12 @@ def write_missing_genomes(fam_genomes, several, all_genomes, aldir, dname): name of dataset """ for fam, genomes in fam_genomes.items(): - missfile = os.path.join(aldir, dname + "-current." + fam + ".miss.lst") + # File where missing genomes will be written + missfile = os.path.join(aldir, f"{dname}-current.{fam}.miss.lst") with open(missfile, "w") as mff: + # missing = missing or several members: + # miss: all genomes - genomes in the family + # several: add to 'miss' genomes with several members in the family missing = (set(all_genomes) - set(genomes)).union(set(several[fam])) if missing: mff.write("\n".join(missing) + "\n") diff --git a/test/data/align/exp_files/exp_getentry-gen-ESCO4.txt b/test/data/align/exp_files/exp_getentry-gen-ESCO4.txt index a63c8270dc03793a17b77365ce26de0716a7c4a4..06486b2ef2d12a5d3fc8fa1b98892bcf751767c9 100755 --- a/test/data/align/exp_files/exp_getentry-gen-ESCO4.txt +++ b/test/data/align/exp_files/exp_getentry-gen-ESCO4.txt @@ -1,3 +1,3 @@ -ESCO4_00001 Aldir/TEST6-current.1.gen -ESCO4_00003 Aldir/TEST6-current.3.gen -ESCO4_00004 Aldir/TEST6-current.2.gen +ESCO4_00001 test/data/align/generated_by_unit-tests/Aldir/test_write_genome-current.1.gen +ESCO4_00003 test/data/align/generated_by_unit-tests/Aldir/test_write_genome-current.3.gen +ESCO4_00004 test/data/align/generated_by_unit-tests/Aldir/test_write_genome-current.2.gen diff --git a/test/data/align/exp_files/exp_getentry-prt-ESCO4.txt b/test/data/align/exp_files/exp_getentry-prt-ESCO4.txt index 13f8524ca5bec18464619b6c241f8c9cc44b2c42..007f47079171bc97fbf8bbdb24547145debcfe43 100755 --- a/test/data/align/exp_files/exp_getentry-prt-ESCO4.txt +++ b/test/data/align/exp_files/exp_getentry-prt-ESCO4.txt @@ -1,3 +1,3 @@ -ESCO4_00001 Aldir/TEST6-current.1.prt -ESCO4_00003 Aldir/TEST6-current.3.prt -ESCO4_00004 Aldir/TEST6-current.2.prt +ESCO4_00001 test/data/align/generated_by_unit-tests/Aldir/test_write_genome-current.1.prt +ESCO4_00003 test/data/align/generated_by_unit-tests/Aldir/test_write_genome-current.3.prt +ESCO4_00004 test/data/align/generated_by_unit-tests/Aldir/test_write_genome-current.2.prt diff --git a/test/test_unit/test_align/test_pan2pergenome.py b/test/test_unit/test_align/test_pan2pergenome.py index 5da774f79f3ad49d5ec809d9f92bda0e90195102..c39f14587e79991bbff77628a48ad087a5f64641 100755 --- a/test/test_unit/test_align/test_pan2pergenome.py +++ b/test/test_unit/test_align/test_pan2pergenome.py @@ -11,11 +11,37 @@ import logging import pytest import PanACoTA.align_module.pan_to_pergenome as p2p - +import test.test_unit.utilities_for_tests as tutil ALPATH = os.path.join("test", "data", "align") EXPPATH = os.path.join(ALPATH, "exp_files") TESTPATH = os.path.join(ALPATH, "test_files") +GENEPATH = os.path.join(ALPATH, "generated_by_unit-tests") + +@pytest.fixture(autouse=True) +def setup_teardown_module(): + """ + Remove log files at the end of this test module + + Before each test: + - init logger + - create directory to put generated files + + After: + - remove all log files + - remove directory with generated results + """ + # utils.init_logger(LOGFILE_BASE, LOG_LEVEL, 'test_getseq', verbose=1) + os.mkdir(GENEPATH) + print("setup") + + yield + # for f in LOGFILES: + # if os.path.exists(f): + # os.remove(f) + shutil.rmtree(GENEPATH) + print("teardown") + ALL_PROTS = {"ESCO1": {"ESCO1_00001": '1', "ESCO1_00002": '4'}, "ESCO2": {"ESCO2_00001": '1', @@ -77,169 +103,169 @@ def test_get_per_genome(caplog): shutil.rmtree(outdir) -def test_prot_per_strain(): - """ - Test parser of persistent genome file - """ - pers = os.path.join("test", "data", "persgenome", "exp_files", "exp_pers-floor-mixed.txt") - all_prots, fams_genomes, several = p2p.proteins_per_strain(pers) - exp_several = {'1': ["GENO.1216.00002"], - '3': [], - '5': [], - '8': [], - '10': [], - '11': [], - '12': ["GENO.1216.00003"]} - assert several == exp_several - exp_fams = {'1': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"], - '3': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"], - '5': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"], - '8': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"], - '10': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"], - '11': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"], - '12': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"]} - assert fams_genomes == exp_fams - exp_prots = {"GEN4.1111.00001": {"GEN4.1111.00001.b0001_00001": '1', - "GEN4.1111.00001.b0001_00009": '3', - "GEN4.1111.00001.i0001_00002": '5', - "GEN4.1111.00001.i0001_00007": '8', - "GEN4.1111.00001.i0001_00004": '10', - "GEN4.1111.00001.i0001_00005": '11', - "GEN4.1111.00001.i0001_00008": '12' - }, - "GENO.0817.00001": {"GENO.0817.00001.b0001_00002": '1', - "GENO.0817.00001.b0002_00011": '3', - "GENO.0817.00001.b0002_00003": '5', - "GENO.0817.00001.i0002_00009": '8', - "GENO.0817.00001.i0002_00004": '10', - "GENO.0817.00001.i0002_00005": '11', - "GENO.0817.00001.i0002_00010": '12' - }, - "GENO.1216.00002": {"GENO.1216.00002.b0001_00001": '1', - "GENO.1216.00002.i0001_00002": '1', - "GENO.1216.00002.b0002_00010": '3', - "GENO.1216.00002.i0001_00003": '5', - "GENO.1216.00002.b0001_00008": '8', - "GENO.1216.00002.i0001_00005": '10', - "GENO.1216.00002.i0001_00006": '11', - "GENO.1216.00002.b0002_00009": '12' - }, - "GENO.1216.00003": {"GENO.1216.00003.i0001_00003": '1', - "GENO.1216.00003.i0001_01010": '3', - "GENO.1216.00003.i0080_00010": '5', - "GENO.1216.00003.i0001_00004": '12', - "GENO.1216.00003.i0001_01000": '12' - } - } - assert all_prots == exp_prots - - -def test_prot_per_strain_member_bis(caplog): - """ - Test parser of persistent genome file when a same member is in 2 different families - """ - caplog.set_level(logging.DEBUG) - pers = os.path.join(TESTPATH, "pers_genome_member-bis.txt") - all_prots, fams_genomes, several = p2p.proteins_per_strain(pers) - assert "problem: ESCO2_2 already exists, in family 5. Conflict with family 32" in caplog.text - exp_several = {'1': [], '5': [], '12': [], '32': []} - assert several == exp_several - exp_fams = {'1': ["ESCO_1", "ESCO2", "ESCO3", "ESCO4"], '5': ["ESCO_1", "ESCO2", "ESCO4"], - '12': ["ESCO_1", "ESCO2", "ESCO3"], '32': ["ESCO_1", "ESCO2", "ESCO3", "ESCO4"]} - assert fams_genomes == exp_fams - exp_prots = {"ESCO_1": {"ESCO_1_1": '1', "ESCO_1_2": '5', "ESCO_1_3": '12', "ESCO_1_4": '32'}, - "ESCO2": {"ESCO2_1": '1', "ESCO2_2": '32', "ESCO2_3": '12'}, - "ESCO3": {"ESCO3_1": '1', "ESCO3_3": '12', "ESCO3_4": '32'}, - "ESCO4": {"ESCO4_1": '1', "ESCO4_3": '5', "ESCO4_4": '32'}} - assert all_prots == exp_prots - - -def test_get_genomes(): - """ - Test parser of list of genomes - """ - lstfile = os.path.join("test", "data", "annotate", "exp_files", "results_test_func-default", - "LSTINFO-list_genomes-func-test-default.lst") - all_genomes = p2p.get_all_genomes(lstfile) - assert all_genomes == ["ESCO.1015.00001", "ESCO.1116.00002", "GENO.1015.00001"] - - -def test_write_getentry(): - """ - Test that when giving a list of genomes with their persistent gene names, - it creates all expected files. - """ - listdir = "Listdir" - aldir = "Aldir" - # Create align folder - os.makedirs(listdir) - dname = "TEST6" - p2p.write_getentry_files(ALL_PROTS, SEVERAL, listdir, aldir, dname, ALL_GENOMES) - # Check creation and content of all files - genfiles = [os.path.join(listdir, "{}-getEntry_gen_ESCO{}.txt".format(dname, num)) for num in - range(1, 7)] - expgens = [os.path.join(EXPPATH, "exp_getentry-gen-ESCO{}.txt".format(num)) for num in - range(1, 7)] - for fexp, fout in zip(expgens, genfiles): - check_list(fexp, fout) - prtfiles = [os.path.join(listdir, "{}-getEntry_prt_ESCO{}.txt".format(dname, num)) for num in - range(1, 7)] - expprts = [os.path.join(EXPPATH, "exp_getentry-prt-ESCO{}.txt".format(num)) for num in - range(1, 7)] - for fexp, fout in zip(expprts, prtfiles): - check_list(fexp, fout) - shutil.rmtree(listdir) - - -def test_write_getentry_error(caplog): - """ - Test that when giving a list of genomes with their persistent gene names, - but for 2 genomes, there is no persistent gene, it exists, with an error message - """ - caplog.set_level(logging.DEBUG) - all_prots = {"ESCO1": {"ESCO1_00001": '1', - "ESCO1_00002": '4'}, - "ESCO2": {"ESCO2_00001": '1', - "ESCO2_22": '2', - "ESCO2_456": '4', - "ESCO2_46": '3'}, - "ESCO3": {"ESCO3_1": '2', - "ESCO3_12": '1', - "ESCO3_4564": '3', - "ESCO3_00123": '4', - "ESCO3_8": '2'}, - "ESCO6": {"ESCO6_1": '4', - "ESCO6_2": '3', - "ESCO6_3": '1'}} - several = {'1': [], - '2': ["ESCO3"], - '3': [], - '4': []} - listdir = "Listdir" - aldir = "Aldir" - # Create align folder - os.makedirs(listdir) - dname = "TEST6" - with pytest.raises(SystemExit): - p2p.write_getentry_files(all_prots, several, listdir, aldir, dname, ALL_GENOMES) - assert ("There is not any protein for genome ESCO4 in any family! The program will close, " - "please fix this problem to be able to run the alignments") in caplog.text - assert ("There is not any protein for genome ESCO5 in any family! The program will close, " - "please fix this problem to be able to run the alignments") in caplog.text - # Check creation and content of all files - genfiles = [os.path.join(listdir, "{}-getEntry_gen_ESCO{}.txt".format(dname, num)) for num in - list(range(1, 4)) + [6]] - expgens = [os.path.join(EXPPATH, "exp_getentry-gen-ESCO{}.txt".format(num)) for num in - list(range(1, 4)) + [6]] - for fexp, fout in zip(expgens, genfiles): - check_list(fexp, fout) - prtfiles = [os.path.join(listdir, "{}-getEntry_prt_ESCO{}.txt".format(dname, num)) for num in - list(range(1, 4)) + [6]] - expprts = [os.path.join(EXPPATH, "exp_getentry-prt-ESCO{}.txt".format(num)) for num in - list(range(1, 4)) + [6]] - for fexp, fout in zip(expprts, prtfiles): - check_list(fexp, fout) - shutil.rmtree(listdir) +# def test_prot_per_strain(): +# """ +# Test parser of persistent genome file +# """ +# pers = os.path.join("test", "data", "persgenome", "exp_files", "exp_pers-floor-mixed.txt") +# all_prots, fams_genomes, several = p2p.proteins_per_strain(pers) +# exp_several = {'1': ["GENO.1216.00002"], +# '3': [], +# '5': [], +# '8': [], +# '10': [], +# '11': [], +# '12': ["GENO.1216.00003"]} +# assert several == exp_several +# exp_fams = {'1': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"], +# '3': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"], +# '5': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"], +# '8': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"], +# '10': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"], +# '11': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"], +# '12': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"]} +# assert fams_genomes == exp_fams +# exp_prots = {"GEN4.1111.00001": {"GEN4.1111.00001.b0001_00001": '1', +# "GEN4.1111.00001.b0001_00009": '3', +# "GEN4.1111.00001.i0001_00002": '5', +# "GEN4.1111.00001.i0001_00007": '8', +# "GEN4.1111.00001.i0001_00004": '10', +# "GEN4.1111.00001.i0001_00005": '11', +# "GEN4.1111.00001.i0001_00008": '12' +# }, +# "GENO.0817.00001": {"GENO.0817.00001.b0001_00002": '1', +# "GENO.0817.00001.b0002_00011": '3', +# "GENO.0817.00001.b0002_00003": '5', +# "GENO.0817.00001.i0002_00009": '8', +# "GENO.0817.00001.i0002_00004": '10', +# "GENO.0817.00001.i0002_00005": '11', +# "GENO.0817.00001.i0002_00010": '12' +# }, +# "GENO.1216.00002": {"GENO.1216.00002.b0001_00001": '1', +# "GENO.1216.00002.i0001_00002": '1', +# "GENO.1216.00002.b0002_00010": '3', +# "GENO.1216.00002.i0001_00003": '5', +# "GENO.1216.00002.b0001_00008": '8', +# "GENO.1216.00002.i0001_00005": '10', +# "GENO.1216.00002.i0001_00006": '11', +# "GENO.1216.00002.b0002_00009": '12' +# }, +# "GENO.1216.00003": {"GENO.1216.00003.i0001_00003": '1', +# "GENO.1216.00003.i0001_01010": '3', +# "GENO.1216.00003.i0080_00010": '5', +# "GENO.1216.00003.i0001_00004": '12', +# "GENO.1216.00003.i0001_01000": '12' +# } +# } +# assert all_prots == exp_prots + + +# def test_prot_per_strain_member_bis(caplog): +# """ +# Test parser of persistent genome file when a same member is in 2 different families +# """ +# caplog.set_level(logging.DEBUG) +# pers = os.path.join(TESTPATH, "pers_genome_member-bis.txt") +# all_prots, fams_genomes, several = p2p.proteins_per_strain(pers) +# assert "problem: ESCO2_2 already exists, in family 5. Conflict with family 32" in caplog.text +# exp_several = {'1': [], '5': [], '12': [], '32': []} +# assert several == exp_several +# exp_fams = {'1': ["ESCO_1", "ESCO2", "ESCO3", "ESCO4"], '5': ["ESCO_1", "ESCO2", "ESCO4"], +# '12': ["ESCO_1", "ESCO2", "ESCO3"], '32': ["ESCO_1", "ESCO2", "ESCO3", "ESCO4"]} +# assert fams_genomes == exp_fams +# exp_prots = {"ESCO_1": {"ESCO_1_1": '1', "ESCO_1_2": '5', "ESCO_1_3": '12', "ESCO_1_4": '32'}, +# "ESCO2": {"ESCO2_1": '1', "ESCO2_2": '32', "ESCO2_3": '12'}, +# "ESCO3": {"ESCO3_1": '1', "ESCO3_3": '12', "ESCO3_4": '32'}, +# "ESCO4": {"ESCO4_1": '1', "ESCO4_3": '5', "ESCO4_4": '32'}} +# assert all_prots == exp_prots + + +# def test_get_genomes(): +# """ +# Test parser of list of genomes +# """ +# lstfile = os.path.join("test", "data", "annotate", "exp_files", "results_test_func-default", +# "LSTINFO-list_genomes-func-test-default.lst") +# all_genomes = p2p.get_all_genomes(lstfile) +# assert all_genomes == ["ESCO.1015.00001", "ESCO.1116.00002", "GENO.1015.00001"] + + +# def test_write_getentry(): +# """ +# Test that when giving a list of genomes with their persistent gene names, +# it creates all expected files. +# """ +# listdir = "Listdir" +# aldir = "Aldir" +# # Create align folder +# os.makedirs(listdir) +# dname = "TEST6" +# p2p.write_getentry_files(ALL_PROTS, SEVERAL, listdir, aldir, dname, ALL_GENOMES) +# # Check creation and content of all files +# genfiles = [os.path.join(listdir, "{}-getEntry_gen_ESCO{}.txt".format(dname, num)) for num in +# range(1, 7)] +# expgens = [os.path.join(EXPPATH, "exp_getentry-gen-ESCO{}.txt".format(num)) for num in +# range(1, 7)] +# for fexp, fout in zip(expgens, genfiles): +# check_list(fexp, fout) +# prtfiles = [os.path.join(listdir, "{}-getEntry_prt_ESCO{}.txt".format(dname, num)) for num in +# range(1, 7)] +# expprts = [os.path.join(EXPPATH, "exp_getentry-prt-ESCO{}.txt".format(num)) for num in +# range(1, 7)] +# for fexp, fout in zip(expprts, prtfiles): +# check_list(fexp, fout) +# shutil.rmtree(listdir) + + +# def test_write_getentry_error(caplog): +# """ +# Test that when giving a list of genomes with their persistent gene names, +# but for 2 genomes, there is no persistent gene, it exists, with an error message +# """ +# caplog.set_level(logging.DEBUG) +# all_prots = {"ESCO1": {"ESCO1_00001": '1', +# "ESCO1_00002": '4'}, +# "ESCO2": {"ESCO2_00001": '1', +# "ESCO2_22": '2', +# "ESCO2_456": '4', +# "ESCO2_46": '3'}, +# "ESCO3": {"ESCO3_1": '2', +# "ESCO3_12": '1', +# "ESCO3_4564": '3', +# "ESCO3_00123": '4', +# "ESCO3_8": '2'}, +# "ESCO6": {"ESCO6_1": '4', +# "ESCO6_2": '3', +# "ESCO6_3": '1'}} +# several = {'1': [], +# '2': ["ESCO3"], +# '3': [], +# '4': []} +# listdir = "Listdir" +# aldir = "Aldir" +# # Create align folder +# os.makedirs(listdir) +# dname = "TEST6" +# with pytest.raises(SystemExit): +# p2p.write_getentry_files(all_prots, several, listdir, aldir, dname, ALL_GENOMES) +# assert ("There is not any protein for genome ESCO4 in any family! The program will close, " +# "please fix this problem to be able to run the alignments") in caplog.text +# assert ("There is not any protein for genome ESCO5 in any family! The program will close, " +# "please fix this problem to be able to run the alignments") in caplog.text +# # Check creation and content of all files +# genfiles = [os.path.join(listdir, "{}-getEntry_gen_ESCO{}.txt".format(dname, num)) for num in +# list(range(1, 4)) + [6]] +# expgens = [os.path.join(EXPPATH, "exp_getentry-gen-ESCO{}.txt".format(num)) for num in +# list(range(1, 4)) + [6]] +# for fexp, fout in zip(expgens, genfiles): +# check_list(fexp, fout) +# prtfiles = [os.path.join(listdir, "{}-getEntry_prt_ESCO{}.txt".format(dname, num)) for num in +# list(range(1, 4)) + [6]] +# expprts = [os.path.join(EXPPATH, "exp_getentry-prt-ESCO{}.txt".format(num)) for num in +# list(range(1, 4)) + [6]] +# for fexp, fout in zip(expprts, prtfiles): +# check_list(fexp, fout) +# shutil.rmtree(listdir) def test_write_genome(): @@ -247,143 +273,137 @@ def test_write_genome(): Test that given a genome, it writes the list of its proteins and genes in expected files. """ - listdir = "Listdir" - aldir = "Aldir" - # Create align folder - os.makedirs(listdir) - dname = "TEST6" - strain = "ESCO4" - member4 = ALL_PROTS[strain] - p2p.write_genome_file(listdir, aldir, dname, strain, member4, SEVERAL) - - # Check creation of files and content - fileprt = os.path.join(listdir, "{}-getEntry_prt_ESCO4.txt".format(dname)) - expprt = os.path.join(EXPPATH, "exp_getentry-prt-ESCO4.txt") - check_list(expprt, fileprt) - filegen = os.path.join(listdir, "{}-getEntry_gen_ESCO4.txt".format(dname)) - expgen = os.path.join(EXPPATH, "exp_getentry-gen-ESCO4.txt") - check_list(expgen, filegen) - # Remove output directory - shutil.rmtree(listdir) - - -def test_write_genome_prt_exists(): - """ - Test that when only prt file exists, it overwrites it and generates - expected prt and gen files - """ - listdir = "Listdir" - aldir = "Aldir" + listdir = os.path.join(GENEPATH, "Listdir") + aldir = os.path.join(GENEPATH, "Aldir") # Create align folder os.makedirs(listdir) - dname = "TEST6" + dname = "test_write_genome" strain = "ESCO4" - member4 = ALL_PROTS[strain] - - # Create prt file - fileprt = os.path.join(listdir, "{}-getEntry_prt_ESCO4.txt".format(dname)) - with open(fileprt, "w") as prtf: - prtf.write("Wrong prt file\n") - p2p.write_genome_file(listdir, aldir, dname, strain, member4, SEVERAL) + members = ALL_PROTS[strain] + p2p.write_genome_file(listdir, aldir, dname, strain, members, SEVERAL) # Check creation of files and content + fileprt = os.path.join(listdir, f"{dname}-getEntry_prt_ESCO4.txt") expprt = os.path.join(EXPPATH, "exp_getentry-prt-ESCO4.txt") - check_list(expprt, fileprt) - filegen = os.path.join(listdir, "{}-getEntry_gen_ESCO4.txt".format(dname)) + assert tutil.compare_file_content(fileprt, expprt) + filegen = os.path.join(listdir, f"{dname}-getEntry_gen_ESCO4.txt") expgen = os.path.join(EXPPATH, "exp_getentry-gen-ESCO4.txt") - check_list(expgen, filegen) - # Remove output directory - shutil.rmtree(listdir) - - -def test_write_genome_gen_exists(): - """ - Test that when only gen file exists, it overwrites it and generates - expected prt and gen files - """ - listdir = "Listdir" - aldir = "Aldir" - # Create align folder - os.makedirs(listdir) - dname = "TEST6" - strain = "ESCO4" - member4 = ALL_PROTS[strain] - # Create prt file - filegen = os.path.join(listdir, "{}-getEntry_gen_ESCO4.txt".format(dname)) - with open(filegen, "w") as genf: - genf.write("Wrong gen file\n") - p2p.write_genome_file(listdir, aldir, dname, strain, member4, SEVERAL) - - # Check creation of files and content - fileprt = os.path.join(listdir, "{}-getEntry_prt_ESCO4.txt".format(dname)) - expprt = os.path.join(EXPPATH, "exp_getentry-prt-ESCO4.txt") - check_list(expprt, fileprt) - expgen = os.path.join(EXPPATH, "exp_getentry-gen-ESCO4.txt") - check_list(expgen, filegen) - # Remove output directory - shutil.rmtree(listdir) - - -def test_write_genome_gen_prt_exist(caplog): - """ - Test that when gen and prt files already exist, it does not do anything. - Those files will be used for next steps. - """ - caplog.set_level(logging.DEBUG) - listdir = "Listdir" - aldir = "Aldir" - # Create align folder - os.makedirs(listdir) - dname = "TEST6" - strain = "ESCO4" - member4 = ALL_PROTS[strain] - # Create gen and prt files - filegen = os.path.join(listdir, "{}-getEntry_gen_ESCO4.txt".format(dname)) - with open(filegen, "w") as genf: - genf.write("Wrong gen file\n") - fileprt = os.path.join(listdir, "{}-getEntry_prt_ESCO4.txt".format(dname)) - with open(fileprt, "w") as prtf: - prtf.write("Wrong prt file\n") - p2p.write_genome_file(listdir, aldir, dname, strain, member4, SEVERAL) - - # Check log - assert ("For genome ESCO4, {} and {} already exist. The program will use them to extract " - "proteins and genes. If you prefer to rewrite them, use option " - "-F (or --force).".format(fileprt, filegen)) in caplog.text - - # Check content of prt and gen has not changed - with open(fileprt, "r") as prtf: - lines = prtf.readlines() - assert lines == ["Wrong prt file\n"] - with open(filegen, "r") as prtf: - lines = prtf.readlines() - assert lines == ["Wrong gen file\n"] - - # Remove output directory - shutil.rmtree(listdir) - - -def check_list(expfile, outfile): - """ - Check that the content of outfile is the same as in expfile - - Parameters - ---------- - expfile : str - path to expected file - outfile : str - path to output file - """ - assert os.path.isfile(outfile) - with open(expfile, "r") as expf, open(outfile, "r") as outf: - lines_exp = [] - lines_out = [] - for lineexp in expf: - lines_exp.append(lineexp.strip()) - for lineout in outf: - lines_out.append(lineout.strip()) - assert len(lines_out) == len(lines_exp) - assert set(lines_out) == set(lines_exp) + assert tutil.compare_file_content(filegen, expgen) + + +# def test_write_genome_prt_exists(): +# """ +# Test that when only prt file exists, it overwrites it and generates +# expected prt and gen files +# """ +# listdir = os.path.join(GENEPATH, "Listdir") +# aldir = os.path.join(GENEPATH, "Aldir") +# # Create align folder +# os.makedirs(listdir) +# dname = "test_write_genome_prt_exists" +# strain = "ESCO4" +# members = ALL_PROTS[strain] + +# # Create prt file +# fileprt = os.path.join(listdir, "{dname}-getEntry_prt_ESCO4.txt") +# with open(fileprt, "w") as prtf: +# prtf.write("Wrong prt file\n") +# p2p.write_genome_file(listdir, aldir, dname, strain, members, SEVERAL) + +# # Check creation of files and content +# expprt = os.path.join(EXPPATH, "exp_getentry-prt-ESCO4.txt") +# assert tutil.compare_file_content(fileprt, expprt) +# filegen = os.path.join(listdir, "{dname}-getEntry_gen_ESCO4.txt") +# expgen = os.path.join(EXPPATH, "exp_getentry-gen-ESCO4.txt") +# assert tutil.compare_file_content(expgen, filegen) + + +# def test_write_genome_gen_exists(): +# """ +# Test that when only gen file exists, it overwrites it and generates +# expected prt and gen files +# """ +# listdir = os.path.join(GENEPATH, "Listdir") +# aldir = os.path.join(GENEPATH, "Aldir") +# # Create align folder +# os.makedirs(listdir) +# dname = "test_write_genome_gen_exists" +# strain = "ESCO4" +# member4 = ALL_PROTS[strain] +# # Create prt file +# filegen = os.path.join(listdir, f"{dname}-getEntry_gen_ESCO4.txt") +# with open(filegen, "w") as genf: +# genf.write("Wrong gen file\n") +# p2p.write_genome_file(listdir, aldir, dname, strain, member4, SEVERAL) + +# # Check creation of files and content +# fileprt = os.path.join(listdir, f"{dname}-getEntry_prt_ESCO4.txt") +# expprt = os.path.join(EXPPATH, "exp_getentry-prt-ESCO4.txt") +# assert tutil.compare_file_content(fileprt, expprt) +# expgen = os.path.join(EXPPATH, "exp_getentry-gen-ESCO4.txt") +# assert tutil.compare_file_content(filegen, expgen) + + +# def test_write_genome_gen_prt_exist(caplog): +# """ +# Test that when gen and prt files already exist, it does not do anything. +# Those files will be used for next steps. +# """ +# caplog.set_level(logging.DEBUG) +# listdir = "Listdir" +# aldir = "Aldir" +# # Create align folder +# os.makedirs(listdir) +# dname = "TEST6" +# strain = "ESCO4" +# member4 = ALL_PROTS[strain] +# # Create gen and prt files +# filegen = os.path.join(listdir, "{}-getEntry_gen_ESCO4.txt".format(dname)) +# with open(filegen, "w") as genf: +# genf.write("Wrong gen file\n") +# fileprt = os.path.join(listdir, "{}-getEntry_prt_ESCO4.txt".format(dname)) +# with open(fileprt, "w") as prtf: +# prtf.write("Wrong prt file\n") +# p2p.write_genome_file(listdir, aldir, dname, strain, member4, SEVERAL) + +# # Check log +# assert ("For genome ESCO4, {} and {} already exist. The program will use them to extract " +# "proteins and genes. If you prefer to rewrite them, use option " +# "-F (or --force).".format(fileprt, filegen)) in caplog.text + +# # Check content of prt and gen has not changed +# with open(fileprt, "r") as prtf: +# lines = prtf.readlines() +# assert lines == ["Wrong prt file\n"] +# with open(filegen, "r") as prtf: +# lines = prtf.readlines() +# assert lines == ["Wrong gen file\n"] + +# # Remove output directory +# shutil.rmtree(listdir) + + +# def check_list(expfile, outfile): +# """ +# Check that the content of outfile is the same as in expfile + +# Parameters +# ---------- +# expfile : str +# path to expected file +# outfile : str +# path to output file +# """ +# assert os.path.isfile(outfile) +# with open(expfile, "r") as expf, open(outfile, "r") as outf: +# lines_exp = [] +# lines_out = [] +# for lineexp in expf: +# lines_exp.append(lineexp.strip()) +# for lineout in outf: +# lines_out.append(lineout.strip()) +# assert len(lines_out) == len(lines_exp) +# assert set(lines_out) == set(lines_exp) def test_write_missing(): @@ -392,38 +412,11 @@ def test_write_missing(): list of all genomes, it returns, for each family, the genomes which will not be considered. """ - aldir = "." - dname = "TEST6" - p2p.write_missing_genomes(FAM_GENOMES, SEVERAL, ALL_GENOMES, aldir, dname) - - # Check content of output files - exp1 = [] - check_missing("{}-current.{}.miss.lst".format(dname, 1), exp1) - exp2 = ["ESCO1", "ESCO3", "ESCO5", "ESCO6"] - check_missing("{}-current.{}.miss.lst".format(dname, 2), exp2) - exp3 = ["ESCO1"] - check_missing("{}-current.{}.miss.lst".format(dname, 3), exp3) - exp4 = ["ESCO4"] - check_missing("{}-current.{}.miss.lst".format(dname, 4), exp4) - - -def check_missing(outfile, exp): - """ - Check that in the given output file, there is the given list of genomes. - Then, remove outfile. - - Parameters - ---------- - outfile : str - output file - exp : list - list of lines that must be in outfile - """ - assert os.path.isfile(outfile) - missing = [] - with open(outfile, "r") as outf: - for line in outf: - missing.append(line.strip()) - assert len(missing) == len(exp) - assert set(missing) == set(exp) - os.remove(outfile) + dname = "test_write_missing" + p2p.write_missing_genomes(FAM_GENOMES, SEVERAL, ALL_GENOMES, GENEPATH, dname) + + exp_res = [None, [], ["ESCO1", "ESCO3", "ESCO5", "ESCO6"], ["ESCO1"], ["ESCO4"]] + for num in range(1,5): + miss_file = os.path.join(GENEPATH, f"{dname}-current.{num}.miss.lst") + assert os.path.isfile(miss_file) + assert tutil.compare_file_to_list(miss_file, exp_res[num]) diff --git a/test/test_unit/utilities_for_tests.py b/test/test_unit/utilities_for_tests.py index 64e17565e70049c007f6e291c43c3c21e902bdfc..69b7012112ac649a1fcf88919c1ec539f9099edb 100755 --- a/test/test_unit/utilities_for_tests.py +++ b/test/test_unit/utilities_for_tests.py @@ -70,4 +70,21 @@ def compare_order_content(file1, file2): if line1 != line2: print(f"'{line1}' vs {line2} do not correspond.") return False - return True \ No newline at end of file + return True + + +def compare_file_to_list(file1, exp): + """ + Check that the file contains the same lines as the list 'exp', in any order + """ + missing = [] + with open(file1, "r") as outf: + for line in outf: + missing.append(line.strip()) + if len(missing) != len(exp): + print(f"{len(missing)} lines in {file1}, {len(exp)} lines expected") + return False + if set(missing) != set(exp): + print("Not same lines") + return False + return True