From 854291cb450558b2be86b8cce47f0c390b393ed8 Mon Sep 17 00:00:00 2001 From: Amandine PERRIN <amandine.perrin@pasteur.fr> Date: Wed, 19 Aug 2020 13:28:21 +0200 Subject: [PATCH] Adapt tests for prokka format --- .../prokka_out_for_test.tbl | 1 - .../prokka_out_tbl_changed-contnames.tbl | 78 +++++++++++++++++++ .../test_annotate/test_format_prokka.py | 47 ++++++++--- 3 files changed, 116 insertions(+), 10 deletions(-) create mode 100644 test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl diff --git a/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl b/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl index 9b9cb50c..1bc2b52b 100755 --- a/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl +++ b/test/data/annotate/test_files/original_name.fna-prokkaRes/prokka_out_for_test.tbl @@ -52,7 +52,6 @@ product hypothetical protein db_xref 12345 >Feature test.0417.00002.0005 ->Feature test.0417.00002.0006 >Feature test.0417.00002.0007 3039279 3039355 tRNA inference COORDINATES:profile:Aragorn:1.2 diff --git a/test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl b/test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl new file mode 100644 index 00000000..e8755cbc --- /dev/null +++ b/test/data/annotate/test_files/prokka_out_tbl_changed-contnames.tbl @@ -0,0 +1,78 @@ +>Feature toto_1 +287 787 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00001 + product hypothetical protein +4416 6068 CDS + gene yiaD + inference ab initio prediction:Prodigal:2.6 + inference similar to AA sequence:UniProtKB:P37665 + locus_tag JGIKIPIJ_00005 + product putative lipoprotein YiaD +12000 9000 CDS + EC_number 6.3.2.- + gene vgrG1 + inference ab initio prediction:Prodigal:2.6 + inference similar to AA sequence:UniProtKB:Q9KS45 + locus_tag JGIKIPIJ_00008 + product Actin cross-linking toxin VgrG1 +>Feature toto_2 +77 1237 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00009 + product hypothetical protein +1279 2346 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00010 + product hypothetical protein +2419 3000 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00011 + product hypothetical protein +3500 4000 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00011 + product hypothetical protein +5000 4632 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00011 + product hypothetical protein +>Feature toto_3 +2968265 296902 repeat_region + rpt_family CRISPR + score 13 +3399 4538 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00013 + product hypothetical protein +>Feature toto_4 +4535 7888 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_00014 + product hypothetical protein + db_xref 12345 +>Feature toto_5 +>Feature toto_7 +3039279 3039355 tRNA + inference COORDINATES:profile:Aragorn:1.2 + locus_tag PROKKA_02927 + product tRNA-Met(cat) +3039389 3039465 tRNA + inference COORDINATES:profile:Aragorn:1.2 + locus_tag PROKKA_02928 + product tRNA-Met(cat) +7854 9491 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_03015 + product hypothetical protein +9525 11285 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_03016 + product hypothetical protein +2994578 2994971 repeat_region + rpt_family CRISPR + score 7 +11249 12328 CDS + inference ab initio prediction:Prodigal:2.6 + locus_tag JGIKIPIJ_03017 + product hypothetical protein diff --git a/test/test_unit/test_annotate/test_format_prokka.py b/test/test_unit/test_annotate/test_format_prokka.py index fa606818..0310f1c1 100644 --- a/test/test_unit/test_annotate/test_format_prokka.py +++ b/test/test_unit/test_annotate/test_format_prokka.py @@ -15,14 +15,13 @@ import PanACoTA.annotate_module.format_prokka as prokkafunc import PanACoTA.utils as utils import test.test_unit.utilities_for_tests as tutil -logger = logging.getLogger("test_prokka") - - ANNOTEDIR = os.path.join("test", "data", "annotate") EXP_ANNOTE = os.path.join(ANNOTEDIR, "exp_files") TEST_ANNOTE = os.path.join(ANNOTEDIR, "test_files") GENEPATH = os.path.join(ANNOTEDIR, "generated_by_unit-tests") +LOGFILE_BASE = os.path.join(GENEPATH, "logfile") +LOGFILES = [LOGFILE_BASE + ext for ext in [".log", ".log.debug", ".log.details", ".log.err"]] @pytest.fixture(autouse=True) def setup_teardown_module(): @@ -37,19 +36,48 @@ def setup_teardown_module(): - remove all log files - remove directory with generated results """ - # utils.init_logger(LOGFILE_BASE, 0, 'test_fastme', verbose=1) os.mkdir(GENEPATH) + utils.init_logger(LOGFILE_BASE, 0, 'test_fastme', verbose=1) print("setup") yield - # for f in LOGFILES: - # if os.path.exists(f): - # os.remove(f) + for f in LOGFILES: + if os.path.exists(f): + os.remove(f) # shutil.rmtree(GENEPATH) print("teardown") -def test_tbl_to_lst(caplog): +def test_tbl_to_lst_changed_names(caplog): + """ + Check that generated lstinfo file is as expected, when the genome name is the same as + it already was in the genome given to prokka. + The test tblfile contains the following aspects: + - gene in D strand (start < end) + - gene in C strand (start > end) + - CDS features (some with all info = ECnumber, gene name, product etc. ; + some with missing info) + - tRNA type + - repeat_region type (*2) + - contigs with more than 2 genes + - contig with only 2 genes (both 'b' loc) + - contig with 1 gene ('b' loc) + - contig without gene (should be skipped) + """ + caplog.set_level(logging.DEBUG) + logger = logging.getLogger("test_prokka") + tblfile = os.path.join(TEST_ANNOTE, "prokka_out_tbl_changed-contnames.tbl") + lstfile = os.path.join(GENEPATH, "res_test_tbl2lst.lst") + contigs = ["test.0417.00002.0001\t50", "test.0417.00002.0002\t50", "test.0417.00002.0003\t50", + "test.0417.00002.0004\t50", "test.0417.00002.0005\t50", "test.0417.00002.0006\t50", + "test.0417.00002.0007\t50"] + name = "test.0417.00002" + assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name, logger, changed_name=True) + exp_lst = os.path.join(EXP_ANNOTE, "res_tbl2lst.lst") + assert tutil.compare_order_content(exp_lst, lstfile) + + +def test_tbl_to_lst_not_changed_names(caplog): """ Check that generated lstinfo file is as expected, when the genome name is the same as it already was in the genome given to prokka. @@ -66,13 +94,14 @@ def test_tbl_to_lst(caplog): - contig without gene (should be skipped) """ caplog.set_level(logging.DEBUG) + logger = logging.getLogger("test_prokka") tblfile = os.path.join(TEST_ANNOTE, "original_name.fna-prokkaRes", "prokka_out_for_test.tbl") lstfile = os.path.join(GENEPATH, "res_test_tbl2lst.lst") contigs = ["test.0417.00002.0001\t50", "test.0417.00002.0002\t50", "test.0417.00002.0003\t50", "test.0417.00002.0004\t50", "test.0417.00002.0005\t50", "test.0417.00002.0006\t50", "test.0417.00002.0007\t50"] name = "test.0417.00002" - assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name) + assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name, logger, changed_name=False) exp_lst = os.path.join(EXP_ANNOTE, "res_tbl2lst.lst") assert tutil.compare_order_content(exp_lst, lstfile) -- GitLab