Skip to content
Snippets Groups Projects
Commit 854291cb authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

Adapt tests for prokka format

parent 02d0667f
Branches
Tags
No related merge requests found
Pipeline #35945 failed
......@@ -52,7 +52,6 @@
product hypothetical protein
db_xref 12345
>Feature test.0417.00002.0005
>Feature test.0417.00002.0006
>Feature test.0417.00002.0007
3039279 3039355 tRNA
inference COORDINATES:profile:Aragorn:1.2
......
>Feature toto_1
287 787 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00001
product hypothetical protein
4416 6068 CDS
gene yiaD
inference ab initio prediction:Prodigal:2.6
inference similar to AA sequence:UniProtKB:P37665
locus_tag JGIKIPIJ_00005
product putative lipoprotein YiaD
12000 9000 CDS
EC_number 6.3.2.-
gene vgrG1
inference ab initio prediction:Prodigal:2.6
inference similar to AA sequence:UniProtKB:Q9KS45
locus_tag JGIKIPIJ_00008
product Actin cross-linking toxin VgrG1
>Feature toto_2
77 1237 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00009
product hypothetical protein
1279 2346 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00010
product hypothetical protein
2419 3000 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00011
product hypothetical protein
3500 4000 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00011
product hypothetical protein
5000 4632 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00011
product hypothetical protein
>Feature toto_3
2968265 296902 repeat_region
rpt_family CRISPR
score 13
3399 4538 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00013
product hypothetical protein
>Feature toto_4
4535 7888 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_00014
product hypothetical protein
db_xref 12345
>Feature toto_5
>Feature toto_7
3039279 3039355 tRNA
inference COORDINATES:profile:Aragorn:1.2
locus_tag PROKKA_02927
product tRNA-Met(cat)
3039389 3039465 tRNA
inference COORDINATES:profile:Aragorn:1.2
locus_tag PROKKA_02928
product tRNA-Met(cat)
7854 9491 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_03015
product hypothetical protein
9525 11285 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_03016
product hypothetical protein
2994578 2994971 repeat_region
rpt_family CRISPR
score 7
11249 12328 CDS
inference ab initio prediction:Prodigal:2.6
locus_tag JGIKIPIJ_03017
product hypothetical protein
......@@ -15,14 +15,13 @@ import PanACoTA.annotate_module.format_prokka as prokkafunc
import PanACoTA.utils as utils
import test.test_unit.utilities_for_tests as tutil
logger = logging.getLogger("test_prokka")
ANNOTEDIR = os.path.join("test", "data", "annotate")
EXP_ANNOTE = os.path.join(ANNOTEDIR, "exp_files")
TEST_ANNOTE = os.path.join(ANNOTEDIR, "test_files")
GENEPATH = os.path.join(ANNOTEDIR, "generated_by_unit-tests")
LOGFILE_BASE = os.path.join(GENEPATH, "logfile")
LOGFILES = [LOGFILE_BASE + ext for ext in [".log", ".log.debug", ".log.details", ".log.err"]]
@pytest.fixture(autouse=True)
def setup_teardown_module():
......@@ -37,19 +36,48 @@ def setup_teardown_module():
- remove all log files
- remove directory with generated results
"""
# utils.init_logger(LOGFILE_BASE, 0, 'test_fastme', verbose=1)
os.mkdir(GENEPATH)
utils.init_logger(LOGFILE_BASE, 0, 'test_fastme', verbose=1)
print("setup")
yield
# for f in LOGFILES:
# if os.path.exists(f):
# os.remove(f)
for f in LOGFILES:
if os.path.exists(f):
os.remove(f)
# shutil.rmtree(GENEPATH)
print("teardown")
def test_tbl_to_lst(caplog):
def test_tbl_to_lst_changed_names(caplog):
"""
Check that generated lstinfo file is as expected, when the genome name is the same as
it already was in the genome given to prokka.
The test tblfile contains the following aspects:
- gene in D strand (start < end)
- gene in C strand (start > end)
- CDS features (some with all info = ECnumber, gene name, product etc. ;
some with missing info)
- tRNA type
- repeat_region type (*2)
- contigs with more than 2 genes
- contig with only 2 genes (both 'b' loc)
- contig with 1 gene ('b' loc)
- contig without gene (should be skipped)
"""
caplog.set_level(logging.DEBUG)
logger = logging.getLogger("test_prokka")
tblfile = os.path.join(TEST_ANNOTE, "prokka_out_tbl_changed-contnames.tbl")
lstfile = os.path.join(GENEPATH, "res_test_tbl2lst.lst")
contigs = ["test.0417.00002.0001\t50", "test.0417.00002.0002\t50", "test.0417.00002.0003\t50",
"test.0417.00002.0004\t50", "test.0417.00002.0005\t50", "test.0417.00002.0006\t50",
"test.0417.00002.0007\t50"]
name = "test.0417.00002"
assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name, logger, changed_name=True)
exp_lst = os.path.join(EXP_ANNOTE, "res_tbl2lst.lst")
assert tutil.compare_order_content(exp_lst, lstfile)
def test_tbl_to_lst_not_changed_names(caplog):
"""
Check that generated lstinfo file is as expected, when the genome name is the same as
it already was in the genome given to prokka.
......@@ -66,13 +94,14 @@ def test_tbl_to_lst(caplog):
- contig without gene (should be skipped)
"""
caplog.set_level(logging.DEBUG)
logger = logging.getLogger("test_prokka")
tblfile = os.path.join(TEST_ANNOTE, "original_name.fna-prokkaRes", "prokka_out_for_test.tbl")
lstfile = os.path.join(GENEPATH, "res_test_tbl2lst.lst")
contigs = ["test.0417.00002.0001\t50", "test.0417.00002.0002\t50", "test.0417.00002.0003\t50",
"test.0417.00002.0004\t50", "test.0417.00002.0005\t50", "test.0417.00002.0006\t50",
"test.0417.00002.0007\t50"]
name = "test.0417.00002"
assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name)
assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name, logger, changed_name=False)
exp_lst = os.path.join(EXP_ANNOTE, "res_tbl2lst.lst")
assert tutil.compare_order_content(exp_lst, lstfile)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment