From 854291cb450558b2be86b8cce47f0c390b393ed8 Mon Sep 17 00:00:00 2001
From: Amandine PERRIN <amandine.perrin@pasteur.fr>
Date: Wed, 19 Aug 2020 13:28:21 +0200
Subject: [PATCH] Adapt tests for prokka format

---
 .../prokka_out_for_test.tbl                   |  1 -
 .../prokka_out_tbl_changed-contnames.tbl      | 78 +++++++++++++++++++
 .../test_annotate/test_format_prokka.py       | 47 ++++++++---
 3 files changed, 116 insertions(+), 10 deletions(-)
 create mode 100644 test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl

diff --git a/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl b/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl
index 9b9cb50c..1bc2b52b 100755
--- a/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl
+++ b/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl
@@ -52,7 +52,6 @@
 			product	hypothetical protein
 			db_xref	12345
 >Feature test.0417.00002.0005
->Feature test.0417.00002.0006
 >Feature test.0417.00002.0007
 3039279	3039355	tRNA
 			inference	COORDINATES:profile:Aragorn:1.2
diff --git a/test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl b/test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl
new file mode 100644
index 00000000..e8755cbc
--- /dev/null
+++ b/test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl
@@ -0,0 +1,78 @@
+>Feature toto_1
+287	787	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00001
+			product	hypothetical protein
+4416	6068	CDS
+			gene	yiaD
+			inference	ab initio prediction:Prodigal:2.6
+			inference	similar to AA sequence:UniProtKB:P37665
+			locus_tag	JGIKIPIJ_00005
+			product	putative lipoprotein YiaD
+12000	9000	CDS
+			EC_number	6.3.2.-
+			gene	vgrG1
+			inference	ab initio prediction:Prodigal:2.6
+			inference	similar to AA sequence:UniProtKB:Q9KS45
+			locus_tag	JGIKIPIJ_00008
+			product	Actin cross-linking toxin VgrG1
+>Feature toto_2
+77	1237	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00009
+			product	hypothetical protein
+1279	2346	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00010
+			product	hypothetical protein
+2419	3000	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00011
+			product	hypothetical protein
+3500	4000	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00011
+			product	hypothetical protein
+5000	4632	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00011
+			product	hypothetical protein
+>Feature toto_3
+2968265	296902	repeat_region
+			rpt_family	CRISPR
+			score	13
+3399	4538	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00013
+			product	hypothetical protein
+>Feature toto_4
+4535	7888	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_00014
+			product	hypothetical protein
+			db_xref	12345
+>Feature toto_5
+>Feature toto_7
+3039279	3039355	tRNA
+			inference	COORDINATES:profile:Aragorn:1.2
+			locus_tag	PROKKA_02927
+			product	tRNA-Met(cat)
+3039389	3039465	tRNA
+			inference	COORDINATES:profile:Aragorn:1.2
+			locus_tag	PROKKA_02928
+			product	tRNA-Met(cat)
+7854	9491	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_03015
+			product	hypothetical protein
+9525	11285	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_03016
+			product	hypothetical protein
+2994578	2994971	repeat_region
+			rpt_family	CRISPR
+			score	7
+11249	12328	CDS
+			inference	ab initio prediction:Prodigal:2.6
+			locus_tag	JGIKIPIJ_03017
+			product	hypothetical protein
diff --git a/test/test_unit/test_annotate/test_format_prokka.py b/test/test_unit/test_annotate/test_format_prokka.py
index fa606818..0310f1c1 100644
--- a/test/test_unit/test_annotate/test_format_prokka.py
+++ b/test/test_unit/test_annotate/test_format_prokka.py
@@ -15,14 +15,13 @@ import PanACoTA.annotate_module.format_prokka as prokkafunc
 import PanACoTA.utils as utils
 import test.test_unit.utilities_for_tests as tutil
 
-logger = logging.getLogger("test_prokka")
-
-
 ANNOTEDIR = os.path.join("test", "data", "annotate")
 EXP_ANNOTE = os.path.join(ANNOTEDIR, "exp_files")
 TEST_ANNOTE = os.path.join(ANNOTEDIR, "test_files")
 GENEPATH = os.path.join(ANNOTEDIR, "generated_by_unit-tests")
 
+LOGFILE_BASE = os.path.join(GENEPATH, "logfile")
+LOGFILES = [LOGFILE_BASE + ext for ext in [".log", ".log.debug", ".log.details", ".log.err"]]
 
 @pytest.fixture(autouse=True)
 def setup_teardown_module():
@@ -37,19 +36,48 @@ def setup_teardown_module():
     - remove all log files
     - remove directory with generated results
     """
-    # utils.init_logger(LOGFILE_BASE, 0, 'test_fastme', verbose=1)
     os.mkdir(GENEPATH)
+    utils.init_logger(LOGFILE_BASE, 0, 'test_fastme', verbose=1)
     print("setup")
 
     yield
-    # for f in LOGFILES:
-    #     if os.path.exists(f):
-    #         os.remove(f)
+    for f in LOGFILES:
+        if os.path.exists(f):
+            os.remove(f)
     # shutil.rmtree(GENEPATH)
     print("teardown")
 
 
-def test_tbl_to_lst(caplog):
+def test_tbl_to_lst_changed_names(caplog):
+    """
+    Check that generated lstinfo file is as expected, when the genome name is the same as
+    it already was in the genome given to prokka.
+    The test tblfile contains the following aspects:
+    - gene in D strand (start < end)
+    - gene in C strand (start > end)
+    - CDS features (some with all info = ECnumber, gene name, product etc. ;
+    some with missing info)
+    - tRNA type
+    - repeat_region type (*2)
+    - contigs with more than 2 genes
+    - contig with only 2 genes (both 'b' loc)
+    - contig with 1 gene ('b' loc)
+    - contig without gene (should be skipped)
+    """
+    caplog.set_level(logging.DEBUG)
+    logger = logging.getLogger("test_prokka")
+    tblfile = os.path.join(TEST_ANNOTE, "prokka_out_tbl_changed-contnames.tbl")
+    lstfile = os.path.join(GENEPATH, "res_test_tbl2lst.lst")
+    contigs = ["test.0417.00002.0001\t50", "test.0417.00002.0002\t50", "test.0417.00002.0003\t50",
+               "test.0417.00002.0004\t50", "test.0417.00002.0005\t50", "test.0417.00002.0006\t50",
+               "test.0417.00002.0007\t50"]
+    name = "test.0417.00002"
+    assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name, logger, changed_name=True)
+    exp_lst = os.path.join(EXP_ANNOTE, "res_tbl2lst.lst")
+    assert tutil.compare_order_content(exp_lst, lstfile)
+
+
+def test_tbl_to_lst_not_changed_names(caplog):
     """
     Check that generated lstinfo file is as expected, when the genome name is the same as
     it already was in the genome given to prokka.
@@ -66,13 +94,14 @@ def test_tbl_to_lst(caplog):
     - contig without gene (should be skipped)
     """
     caplog.set_level(logging.DEBUG)
+    logger = logging.getLogger("test_prokka")
     tblfile = os.path.join(TEST_ANNOTE, "original_name.fna-prokkaRes", "prokka_out_for_test.tbl")
     lstfile = os.path.join(GENEPATH, "res_test_tbl2lst.lst")
     contigs = ["test.0417.00002.0001\t50", "test.0417.00002.0002\t50", "test.0417.00002.0003\t50",
                "test.0417.00002.0004\t50", "test.0417.00002.0005\t50", "test.0417.00002.0006\t50",
                "test.0417.00002.0007\t50"]
     name = "test.0417.00002"
-    assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name)
+    assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name, logger, changed_name=False)
     exp_lst = os.path.join(EXP_ANNOTE, "res_tbl2lst.lst")
     assert tutil.compare_order_content(exp_lst, lstfile)
 
-- 
GitLab