diff --git a/PanACoTA/annotate_module/format_prokka.py b/PanACoTA/annotate_module/format_prokka.py index 3824e3dedd582e613cc6d7fb0907fa23473774a9..50abde437e4de7f1bf9eca8a7858938ad17aa02e 100644 --- a/PanACoTA/annotate_module/format_prokka.py +++ b/PanACoTA/annotate_module/format_prokka.py @@ -116,7 +116,7 @@ def format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, gene_dir, # Generate replicon file (same as input sequence but with gembase formatted headers). From # this file, get contig names, to be used to generate gff file - contigs, sizes = utils.get_genome_contigs_and_rename(name, gpath, res_rep_file) + contigs, sizes = utils.get_genome_contigs_and_rename(name, gpath, res_rep_file, logger) if not contigs: try: os.remove(res_rep_file) diff --git a/PanACoTA/utils.py b/PanACoTA/utils.py index 1a128496405004eb800427e9665ac0a59a6db369..2825a5e99dc25cc3c98f89aa81709239cf593ac0 100755 --- a/PanACoTA/utils.py +++ b/PanACoTA/utils.py @@ -697,7 +697,6 @@ def read_genomes(list_file, name, date, dbpath, tmp_path): "ignored when concatenating {}").format(file, genomes_inf)) # If there are files to concatenate, concatenate them if to_concat: - print(to_concat) genome_name = to_concat[0] + "-all.fna" concat_file = os.path.join(tmp_path, genome_name) to_concat = [os.path.join(dbpath, gname) for gname in to_concat] @@ -1110,7 +1109,7 @@ def check_out_dirs(resdir): sys.exit(1) -def get_genome_contigs_and_rename(gembase_name, gpath, outfile): +def get_genome_contigs_and_rename(gembase_name, gpath, outfile, logger): """ For the given genome (sequence in gpath), rename all its contigs with the new name: 'gembase_name', and save the output sequence in outfile. @@ -1165,15 +1164,16 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): # - write header ("<contig name> <size>") to replicon file if prev_cont: cont = "\t".join([prev_cont, str(cont_size)]) + "\n" - prevcont_nohead = "".join(prev_cont.split(">")[1:]) - prev_orig_name_nohead = "".join(prev_orig_name.split(">")[1:]) + prevcont_nohead = prev_cont.split(">")[1] + prev_orig_name_nohead = prev_orig_name.split(">")[1] if prev_orig_name_nohead: sizes[prevcont_nohead] = cont_size contigs[prev_orig_name_nohead] = prevcont_nohead grf.write(cont) grf.write(seq) prev_cont = ">" + gembase_name + "." + str(contig_num).zfill(4) - prev_orig_name = line.strip() + # keep only first string of contig + prev_orig_name = line.strip().split()[0] contig_num += 1 cont_size = 0 seq = "" @@ -1184,8 +1184,11 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): # Write last contig cont = "\t".join([prev_cont, str(cont_size)]) + "\n" prevcont_nohead = "".join(prev_cont.split(">")[1:]) - prev_orig_name_nohead = "".join(prev_orig_name.split(">")[1:]) + prev_orig_name_nohead = prev_orig_name.split(">")[1] if prev_orig_name_nohead: + if prev_orig_name_nohead in contigs: + logger.error(f"several contigs have the same name {prev_cont} in {gpath}.") + return False, False contigs[prev_orig_name_nohead] = prevcont_nohead sizes[prevcont_nohead] = cont_size grf.write(cont) diff --git a/test/data/annotate/test_files/list_genomes-func-test-default.txt b/test/data/annotate/test_files/list_genomes-func-test-default.txt index 70900f0cd5b0da065fbffbd9ef4686e7f925f0cd..b83a1a30e1236c17b1fd05d5b8a71e2e756edd29 100755 --- a/test/data/annotate/test_files/list_genomes-func-test-default.txt +++ b/test/data/annotate/test_files/list_genomes-func-test-default.txt @@ -1,3 +1,3 @@ B2_A3_5.fasta-changeName.fna::ESCO.1116 H299_H561.fasta genome6.fasta :: ESCO.1015 -A_H738.fasta toto.fst genome1.fasta genome.fst::.1015 +A_H738.fasta genome1.fasta ::.1015 diff --git a/test/test_functional/test_annote.py b/test/test_functional/test_annote.py index 73bb14ac53f39e083e46fe6908d11df18e94a45b..ecbe278f3bb2bcfcf7e0a7c74774383d56c04bc0 100755 --- a/test/test_functional/test_annote.py +++ b/test/test_functional/test_annote.py @@ -5,8 +5,8 @@ Functional tests for genomeAPCAT annotate """ -from genomeAPCAT.subcommands import annote as annot -import genomeAPCAT.utils as utils +from PanACoTA.subcommands import annotate as annot +import PanACoTA.utils as utils import pytest import os @@ -18,23 +18,34 @@ import matplotlib matplotlib.use('AGG') -LOGFILE_BASE = "test_main_from_parse" +# LOGFILE_BASE = "test_main_from_parse" +# Define variables used by several tests +DBDIR = os.path.join("test", "data", "annotate") +GEN_PATH = os.path.join(DBDIR, "genomes") +EXP_DIR = os.path.join(DBDIR, 'exp_files') +TEST_DIR = os.path.join(DBDIR, 'test_files') +GENEPATH = os.path.join(DBDIR, "generated_by_unit-tests") -def setup_module(): +@pytest.fixture(autouse=True) +def setup_teardown_module(): """ - create logger at start of this test module - """ - utils.init_logger(LOGFILE_BASE, 0, '', verbose=1) + Remove log files at the end of this test module + Before each test: + - init logger + - create directory to put generated files -def teardown_module(): - """ - Remove log files at the end of this test module + After: + - remove all log files + - remove directory with generated results """ - os.remove(LOGFILE_BASE + ".log") - os.remove(LOGFILE_BASE + ".log.details") - os.remove(LOGFILE_BASE + ".log.err") + os.mkdir(GENEPATH) + print("setup") + + yield + # shutil.rmtree(GENEPATH) + print("teardown") def test_main_from_parse(): @@ -42,18 +53,15 @@ def test_main_from_parse(): Test that when a tmp folder is given by user, tmp files are saved in it, and prokka files too. """ - list_file = os.path.join("test", "data", "annotate", "test_files", - "list_genomes-func-test-default.txt") - dbpath = os.path.join("test", "data", "annotate", "genomes") - resdir = os.path.join("test", "data", "annotate", "res_test_funcfromparse") - tmpdir = os.path.join("test", "data", "annotate", "tmp_funcGivenTmp") + list_file = os.path.join(TEST_DIR, "list_genomes-func-test-default.txt") + tmpdir = os.path.join(GENEPATH, "tmp_funcGivenTmp") name = "ESCO" - l90 = 1 + l90 = 100 date = "0417" args = argparse.Namespace() args.list_file = list_file - args.db_path = dbpath - args.res_path = resdir + args.db_path = GEN_PATH + args.res_path = GENEPATH args.name = name args.date = date args.l90 = l90 @@ -66,13 +74,16 @@ def test_main_from_parse(): args.prokkadir = None args.verbose = False args.quiet = False + args.from_info = False + args.prodigal_only = False + args.small = False + args.annotdir = False + args.argv = ["annotate", "test_annote.py", "test_main_from_parse"] annot.main_from_parse(args) # Check that tmp files exist in the right folder - assert os.path.isfile(os.path.join(tmpdir, "A_H738.fasta-all.fna-short-contig.fna")) + assert os.path.isfile(os.path.join(tmpdir, "A_H738.fasta-all.fna")) # Test that prokka folder is in the right directory - assert os.path.isdir(os.path.join(tmpdir, "A_H738.fasta-all.fna-short-contig.fna-prokkaRes")) - shutil.rmtree(tmpdir, ignore_errors=True) - shutil.rmtree(resdir, ignore_errors=True) + assert os.path.isdir(os.path.join(tmpdir, "A_H738.fasta-all.fna-prokkaRes")) def test_main_novalid_genome(capsys):