From 8cdec6cfd61d5c2712e0935be58935ed637d70a1 Mon Sep 17 00:00:00 2001 From: Amandine PERRIN <amandine.perrin@pasteur.fr> Date: Mon, 14 Sep 2020 14:40:14 +0200 Subject: [PATCH] Test for generation of new contig names -> corrected error in logs --- PanACoTA/utils.py | 7 +++++- test/test_unit/test_utils.py | 44 ++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/PanACoTA/utils.py b/PanACoTA/utils.py index 04969397..4d51eecf 100755 --- a/PanACoTA/utils.py +++ b/PanACoTA/utils.py @@ -1150,6 +1150,10 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile, logger): prevcont_nohead = prev_cont.split(">")[1] prev_orig_name_nohead = prev_orig_name.split(">")[1] if prev_orig_name_nohead: + if prev_orig_name_nohead in contigs: + logger.error(f"several contigs have the same name " + f"{prev_orig_name_nohead} in {gpath}.") + return False, False sizes[prevcont_nohead] = cont_size contigs[prev_orig_name_nohead] = prevcont_nohead grf.write(cont) @@ -1170,7 +1174,8 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile, logger): prev_orig_name_nohead = prev_orig_name.split(">")[1] if prev_orig_name_nohead: if prev_orig_name_nohead in contigs: - logger.error(f"several contigs have the same name {prev_cont} in {gpath}.") + logger.error(f"several contigs have the same name {prev_orig_name_nohead} " + f"in {gpath}.") return False, False contigs[prev_orig_name_nohead] = prevcont_nohead sizes[prevcont_nohead] = cont_size diff --git a/test/test_unit/test_utils.py b/test/test_unit/test_utils.py index 57fffe61..a1a79707 100755 --- a/test/test_unit/test_utils.py +++ b/test/test_unit/test_utils.py @@ -918,6 +918,50 @@ def test_rename_contigs(): assert utilities.compare_order_content(outfile, exp_file) +def test_rename_contigs_duplicate(caplog): + """ + From a given sequence, there are 2 contigs named "contig2". Stops and returns false + """ + logger = logging.getLogger("default") + gpath = os.path.join(DATA_DIR, "genomes", "genome-duplicated-header.fasta") + gembase_name = "ESCO.0216.00005" + outfile = os.path.join(GENEPATH, "genome_dup_error.fna") + exp_file = os.path.join(DATA_DIR, "exp_files", "res_H299_H561-ESCO00005.fna") + contigs, sizes = utils.get_genome_contigs_and_rename(gembase_name, gpath, outfile, logger) + assert not contigs + assert not sizes + with open(outfile, "r") as of: + assert of.readline().startswith(">ESCO.0216.00005.0001") + of.readline() # skip sequence + assert of.readline().startswith(">ESCO.0216.00005.0002") + of.readline() # skip sequence + assert of.readline().startswith(">ESCO.0216.00005.0003") + assert ("several contigs have the same name contig2 in test/data/annotate/genomes/" + "genome-duplicated-header.fasta.") in caplog.text + + +def test_rename_contigs_duplicate_last(caplog): + """ + The last contig of the sequence has the same name as a previous contig. Stops and returns false + """ + logger = logging.getLogger("default") + gpath = os.path.join(DATA_DIR, "genomes", "genome-duplicated-header-last.fasta") + gembase_name = "ESCO.0216.00005" + outfile = os.path.join(GENEPATH, "genome_dup_error.fna") + exp_file = os.path.join(DATA_DIR, "exp_files", "res_H299_H561-ESCO00005.fna") + contigs, sizes = utils.get_genome_contigs_and_rename(gembase_name, gpath, outfile, logger) + assert not contigs + assert not sizes + with open(outfile, "r") as of: + assert of.readline().startswith(">ESCO.0216.00005.0001") + of.readline() # skip sequence + assert of.readline().startswith(">ESCO.0216.00005.0002") + of.readline() # skip sequence + assert of.readline().startswith(">ESCO.0216.00005.0003") + assert ("several contigs have the same name contig2 in test/data/annotate/genomes/" + "genome-duplicated-header-last.fasta.") in caplog.text + + def test_cat_nobar(): """ Check that when cat is called on a list of several files, the output file -- GitLab