From 46880c6e0a7a25fe9b271b2e5ec39cfe042e19e9 Mon Sep 17 00:00:00 2001 From: Amandine PERRIN <amandine.perrin@pasteur.fr> Date: Mon, 12 Oct 2020 16:46:12 +0200 Subject: [PATCH] fix prokka contig names Original contig names are renamed by prokka. So, to generate fna, lst etc., we need to base on the contig names of prokkaRes/*.fna and not on the sequence given to prokka --- PanACoTA/annotate_module/format_prokka.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/PanACoTA/annotate_module/format_prokka.py b/PanACoTA/annotate_module/format_prokka.py index 33ec3204..ec9e4fd4 100644 --- a/PanACoTA/annotate_module/format_prokka.py +++ b/PanACoTA/annotate_module/format_prokka.py @@ -101,6 +101,7 @@ def format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, gene_dir, """ prokka_dir = os.path.join(prok_path, os.path.basename(gpath) + "-prokkaRes") # Get needed Prokka result files + fna_file = glob.glob(os.path.join(prokka_dir, "*.fna"))[0] prokka_tbl_file = glob.glob(os.path.join(prokka_dir, "*.tbl"))[0] prokka_gff_file = glob.glob(os.path.join(prokka_dir, "*.gff"))[0] prokka_ffn_file = glob.glob(os.path.join(prokka_dir, "*.ffn"))[0] @@ -116,7 +117,7 @@ def format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, gene_dir, # Generate replicon file (same as input sequence but with gembase formatted headers). From # this file, get contig names, to be used to generate gff file - contigs, sizes = utils.get_genome_contigs_and_rename(name, gpath, res_rep_file, logger) + contigs, sizes = utils.get_genome_contigs_and_rename(name, fna_file, res_rep_file, logger) if not contigs: try: os.remove(res_rep_file) @@ -130,7 +131,7 @@ def format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, gene_dir, return False # Convert prokka tbl file to gembase .lst file format - ok_tbl = tbl2lst(prokka_tbl_file, res_lst_file, contigs, name, gpath) + ok_tbl = tbl2lst(prokka_tbl_file, res_lst_file, contigs, name, fna_file) if not ok_tbl: try: os.remove(res_rep_file) @@ -143,7 +144,7 @@ def format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, gene_dir, logger.error("Problems while generating LSTINFO file for {}".format(name)) return False # Create gff3 file for annotations - ok_gff = generate_gff(gpath, prokka_gff_file, res_gff_file, res_lst_file, sizes, contigs) + ok_gff = generate_gff(fna_file, prokka_gff_file, res_gff_file, res_lst_file, sizes, contigs) if not ok_gff: try: os.remove(res_rep_file) -- GitLab