diff --git a/test/test_unit/test_annotate/test_format_func.py b/test/test_unit/test_annotate/test_format_func.py index e61f866dfcb2a8a320d4cb0beb8ab1b3925f823e..0a79462f5f2a00fe73e3e8637645e8d1d9cbcac6 100755 --- a/test/test_unit/test_annotate/test_format_func.py +++ b/test/test_unit/test_annotate/test_format_func.py @@ -63,13 +63,12 @@ def my_logger(): def test_write_gene(): """ Test that lstinfo line is written as expected when writing info for - a gene (CDS). Also check that crispr number is not changed + a gene (CDS). """ gtype = "CDS" locus_num = "5621221" gene_name = "abc" product = "new product" - crispr_num = 1 cont_loc = "i" genome = "ESCO.0216.00005" cont_num = 15 @@ -81,11 +80,10 @@ def test_write_gene(): end = str(656) lstfile = os.path.join(GENEPATH, "toto.lst") lstopenfile = open(lstfile, "w") - crispres, lst_line = ffunc.write_gene(gtype, locus_num, gene_name, product, crispr_num, - cont_loc, genome, cont_num, ecnum, inf2, db_xref, strand, - start, end, lstopenfile) + lst_line = ffunc.write_gene(gtype, locus_num, gene_name, product, + cont_loc, genome, cont_num, ecnum, inf2, db_xref, strand, + start, end, lstopenfile) lstopenfile.close() - assert crispres == crispr_num assert lst_line == ("154\t656\tC\tCDS\tESCO.0216.00005.0015i_5621221\tabc\t| new product " "| 454.12.5 | more information... dfd _ with _ pipe_characters... | " "mydb_pipe") @@ -93,39 +91,6 @@ def test_write_gene(): assert tutil.compare_order_content(exp_file, lstfile) -def test_write_crispr(): - """ - Test that lstinfo line is written as expected when writing info for CRISPR, - and that crispr num increased by 1 - """ - gtype = "repeat_region" - locus_num = "465" - gene_name = "NA" - product = "NA" - crispr_num = 1 - cont_loc = "b" - genome = "ESCO.0216.00005" - cont_num = 15 - ecnum = "NA" - inf2 = "more information... dfd | with | pipe|characters..." - db_xref = "mydb|pipe" - strand = "D" - start = str(154) - end = str(656) - lstfile = os.path.join(GENEPATH, "toto.lst") - lstopenfile = open(lstfile, "w") - crispres, lstline = ffunc.write_gene(gtype, locus_num, gene_name, product, crispr_num, - cont_loc, genome, cont_num, ecnum, inf2, db_xref, strand, - start, end, lstopenfile) - lstopenfile.close() - exp_file = os.path.join(EXP_ANNOTE, "res_test_write_geneCRISPR.lst") - assert crispres == 2 - assert lstline == ("154\t656\tD\tCRISPR\tESCO.0216.00005.0015b_CRISPR1\tcrispr\t| " - "crispr-array | NA | more information... dfd _ with _ pipe_characters... | " - "mydb_pipe") - assert tutil.compare_order_content(exp_file, lstfile) - - def test_contig_name(): """ test that when we give a genome name and a contig number, it returns the expected fasta header @@ -170,865 +135,178 @@ def test_write_header_gene_no_name(): outfile.close() -def test_write_header_crispr(): +def test_handle_genome_badprok(): """ - From a given line of lstinfo file, giving info for a CRISPR check that the header - line of the protein and gene files are generated as expected. + Test that when we try to format a genome which was annotated by prokka, but original genome + is empty -> cannot format genome -> returns False associated with genome name """ - outfile = StringIO() - lstline = ("296902\t2968265\tC\tCRISPR\ttest.0417.00002.0003b_CRISPR1\tcrispr\t| " - "crispr-array | NA | NA") - ffunc.write_header(lstline, outfile) - res = outfile.getvalue() - exp = ">test.0417.00002.0003b_CRISPR1 2671364 crispr | crispr-array | NA | NA\n" - assert res == exp - outfile.close() - - -def test_handle_genome_nores(): + # Create prokka output dir and pur expected files (empty, we want to generate an error) + gpath = os.path.join(GENEPATH, "toto.fasta") + open(gpath, "w").close() + prok_path = gpath + "-prokkaRes" + os.makedirs(prok_path) + tbl_res = os.path.join(prok_path, " toto.tbl") + gff_res = os.path.join(prok_path, "toto.gff") + ffn_res = os.path.join(prok_path, "toto.ffn") + faa_res = os.path.join(prok_path, "toto.faa") + for file in [tbl_res, gff_res, ffn_res, faa_res]: + open(file, "w").close() + # Create output directory for .fna file + rep_dir = os.path.join(GENEPATH, "Replicons") + os.makedirs(rep_dir) + # Get args for function + args = ("toto.fasta", "name", gpath, GENEPATH, "lst/dir", "prot/dir", + "gene/dir", rep_dir, "gff/dir", False, my_logger()[0]) + ok_format, genome = ffunc.handle_genome(args) + assert ok_format == False + assert genome == "toto.fasta" + + +def test_handle_genome_badprodigal(): """ - Test that when we try to format a genome which is not in results, - it returns a tuple with "no_res" and the genome name. + Test that when we try to format a genome which was annotated by prokka, but original genome + is empty -> cannot format genome -> returns False associated with genome name """ - results = {"abcd.fasta": True} - prodigal_only = False - args = ("toto.fasta", "name", "genome/path", "prokka/path", "lst/dir", "prot/dir", - "gene/dir", "rep/dir", "gff/dir", prodigal_only, my_logger()[0]) - res = ffunc.handle_genome(args) - assert res == ("no_res", "toto.fasta") - - -# def test_handle_genome_badprok(): -# """ -# Test that when we try to format a genome which is in results, but with False, -# it returns a tuple with "bad_prokka" and the genome name. -# """ -# results = {"abcd.fasta": True, "toto.fasta": False} -# args = ("toto.fasta", "name", "genome/path", "prokka/path", "lst/dir", "prot/dir", -# "gene/dir", "rep/dir", "gff/dir", False, my_logger()[0]) -# res = ffunc.handle_genome(args) -# assert res == ("bad_prokka", "toto.fasta") - - -# def test_handle_genome_formatok(): -# """ -# Test that when we try to format a genome which is in results, with True, -# it returns a tuple with "True" and the genome name. -# """ -# gpath = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-split5N.fna-short-contig.fna") -# name = "test.0417.00002" -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# lst_dir = os.path.join("test", "data", "annotate") -# prot_dir = lst_dir -# gene_dir = lst_dir -# rep_dir = lst_dir -# gff_dir = lst_dir -# results = {"B2_A3_5.fasta-split5N.fna-short-contig.fna": True, "toto.fasta": False} -# args = ("B2_A3_5.fasta-split5N.fna-short-contig.fna", name, gpath, prok_path, -# lst_dir, prot_dir, -# gene_dir, rep_dir, gff_dir, results, my_logger()[0]) -# res = ffunc.handle_genome(args) -# assert res == (True, "B2_A3_5.fasta-split5N.fna-short-contig.fna") -# os.remove(os.path.join(lst_dir, name + ".prt")) -# os.remove(os.path.join(lst_dir, name + ".fna")) -# os.remove(os.path.join(lst_dir, name + ".gen")) -# os.remove(os.path.join(lst_dir, name + ".lst")) -# os.remove(os.path.join(lst_dir, name + ".gff")) - - -# def test_handle_genome_formaterror(): -# """ -# Test that when we try to format a genome which is in results, but with False, -# it returns a tuple with "bad_prokka" and the genome name. -# """ -# logger = my_logger() -# gpath = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-problems.fna-short-contig.fna") -# name = "test.0417.00002" -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# tbl_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# tblout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# shutil.copyfile(tbl_init, tblout) -# gff_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".gff") -# gffout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".gff") -# shutil.copyfile(gff_init, gffout) -# lst_dir = os.path.join("test", "data", "annotate") -# prot_dir = lst_dir -# gene_dir = lst_dir -# rep_dir = lst_dir -# gff_dir = lst_dir -# results = {"B2_A3_5.fasta-problems.fna-short-contig.fna": True, "toto.fasta": False} -# args = ("B2_A3_5.fasta-problems.fna-short-contig.fna", name, gpath, -# prok_path, lst_dir, prot_dir, gene_dir, rep_dir, gff_dir, results, logger[0]) -# res = ffunc.handle_genome(args) -# assert res == (False, "B2_A3_5.fasta-problems.fna-short-contig.fna") -# msg = ("Unknown header format >EPKOMDHM_i00002 hypothetical protein in " -# "test/data/annotate/exp_files/B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes/" -# "test.0417.00002.ffn.\n" -# "Gen file will not be created.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg -# # remove tblout which was copied for this test -# assert not os.path.isfile(os.path.join(lst_dir, name + ".prt")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gen")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".fna")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".lst")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gff")) -# os.remove(tblout) -# os.remove(gffout) - - -# def test_tbl_to_lst_new_name(): -# """ -# Check that generated lstinfo file is as expected, when the genome name has changed between -# the one given to prokka, and the name given now. -# The test tblfile contains the following aspects: -# - gene in D strand (start < end) -# - gene in C strand (start > end) -# - CDS features (some with all info = ECnumber, gene name, product etc. ; -# some with missing info) -# - tRNA type -# - repeat_region type (*2) -# - contigs with more than 2 genes -# - contig with only 2 genes (both 'b' loc) -# - contig with 1 gene ('b' loc) -# - contig without gene (should be skipped) -# """ -# tblfile = os.path.join("test", "data", "annotate", "test_files", "original_name.fna-prokkaRes", -# "prokka_out_for_test.tbl") -# lstfile = os.path.join("test", "data", "annotate", "test_tbl2lstNewName.lst") -# exp_lst = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst-newName.lst") -# name = "test.0417.00010" -# assert not ffunc.tbl2lst(tblfile, lstfile, name) -# with open(exp_lst, "r") as expf, open(lstfile, "r") as lstf: -# for line_exp, line_out in zip(expf, lstf): -# assert line_exp == line_out -# os.remove(lstfile) - - - - - - - -# def test_create_prt_wrong_header_sep(): -# """ -# Test that, when creating prt file from faa and lst, if a header of faa file is -# not in the right format (protein name and number are not separated by '_'), -# it writes an error, erases prt file, and returns False. -# """ -# logger = my_logger() -# faaseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-wrongHeaderSep.faa") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# prtseq = os.path.join("test", "data", "annotate", "test_create_prt-wrongHeadSep.prt") -# assert not ffunc.create_prt(faaseq, lstfile, prtseq, logger[1]) -# assert not os.path.isfile(prtseq) -# msg = ("Unknown header format >JGIKIPIJ00008 in test/data/annotate/test_files/" -# "prokka_out_for_test-wrongHeaderSep.faa. Error: invalid literal for int() " -# "with base 10: '>JGIKIPIJ00008'\nPrt file not created from " -# "test/data/annotate/test_files/prokka_out_for_test-wrongHeaderSep.faa.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_prt_wrong_header_int(): -# """ -# Test that, when creating prt file from faa and lst, if a header of faa file is -# not in the right format (protein name and number are separated by '_', but protein num -# contains a letter), it writes an error, erases prt file, and returns False. -# """ -# logger = my_logger() -# faaseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-wrongHeaderInt.faa") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# prtseq = os.path.join("test", "data", "annotate", "test_create_prt-wrongHeadInt.prt") -# assert not ffunc.create_prt(faaseq, lstfile, prtseq, logger[1]) -# assert not os.path.isfile(prtseq) -# msg = ("Unknown header format >JGIKIPIJ_d0008 in test/data/annotate/test_files/" -# "prokka_out_for_test-wrongHeaderInt.faa. Error: invalid literal for int() " -# "with base 10: 'd0008'\nPrt file not created from " -# "test/data/annotate/test_files/prokka_out_for_test-wrongHeaderInt.faa.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_prt_miss_lst(): -# """ -# Test that, when creating prt file from faa and lst, if a protein of faa file is not present in -# the lst file, it writes an error, removes the prt file, and returns False. -# """ -# logger = my_logger() -# faaseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-supHeader.faa") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# prtseq = os.path.join("test", "data", "annotate", "test_create_prt-missLst.prt") -# assert not ffunc.create_prt(faaseq, lstfile, prtseq, logger[1]) -# assert not os.path.isfile(prtseq) -# msg = ("Missing info for protein >sup-prot_00012 in " -# "test/data/annotate/exp_files/res_tbl2lst.lst. If it is " -# "actually present in the lst file, check that proteins are ordered by " -# "increasing number in both lst and faa files.\n" -# "Prt file not created from test/data/annotate/test_files/" -# "prokka_out_for_test-supHeader.faa.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_prt_wrong_order(): -# """ -# Test that, when creating prt file from faa and lst, if a protein of faa file is not in -# increasing protein number, so that it does not correspond to the protein in the lstinfo file, -# it writes an error, removes the prt file, and returns False. -# """ -# logger = my_logger() -# faaseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-wrongOrder.faa") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# prtseq = os.path.join("test", "data", "annotate", "test_create_prt-wrongOrder.prt") -# assert not ffunc.create_prt(faaseq, lstfile, prtseq, logger[1]) -# assert not os.path.isfile(prtseq) -# msg = ("Missing info for protein >appears_after_13_00011 in " -# "test/data/annotate/exp_files/res_tbl2lst.lst. If it is " -# "actually present in the lst file, check that proteins are ordered by " -# "increasing number in both lst and faa files.\n" -# "Prt file not created from test/data/annotate/test_files/" -# "prokka_out_for_test-wrongOrder.faa.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_prt_ok(): -# """ -# Test that when everything is ok in both faa and lst files, the prt file is -# created as expected. -# """ -# logger = my_logger() -# faaseq = os.path.join("test", "data", "annotate", "test_files", "original_name.fna-prokkaRes", -# "prokka_out_for_test.faa") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# prtseq = os.path.join("test", "data", "annotate", "test_create_prt.prt") -# assert ffunc.create_prt(faaseq, lstfile, prtseq, logger[1]) -# exp_file = os.path.join("test", "data", "annotate", "exp_files", "res_create_prt.faa") -# with open(exp_file, "r") as expf, open(prtseq, "r") as prtf: -# for line_exp, line_out in zip(expf, prtf): -# assert line_exp == line_out -# os.remove(prtseq) - - -# def test_create_gen_sup_crispr(): -# """ -# Test that when there is a CRISPR in the ffn file, but not in lstinfo, -# it generates an error, because the CRISPR ID does not correspond to the gene ID in lstinfo. -# It should return False, write an error message, and remove the .gen file. -# Moreover, the CRISPR ID is not in the same format as a gene ID, so the error should -# be on the format. -# """ -# logger = my_logger() -# ffnseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-supCRISPR.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# genseq = os.path.join("test", "data", "annotate", "test_create_gen_supCRISPR.gen") -# assert not ffunc.create_gen(ffnseq, lstfile, genseq, logger[1]) -# assert not os.path.isfile(genseq) -# msg = ("Unknown header format >prokka_out_for_test in test/data/annotate/test_files/" -# "prokka_out_for_test-supCRISPR.ffn.\nGen file will not be created.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_gen_sup_gene(): -# """ -# Test that, when creating gen file from ffn and lst, if a gene of ffn file is not present in -# the lst file, it writes an error, removes the gen file, and returns False. -# """ -# logger = my_logger() -# ffnseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-supGene.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# prtseq = os.path.join("test", "data", "annotate", "test_create_gen-supgene.prt") -# assert not ffunc.create_gen(ffnseq, lstfile, prtseq, logger[1]) -# assert not os.path.isfile(prtseq) -# msg = ("Missing info for gene >sup_gene_00012 in test/data/annotate/exp_files/" -# "res_tbl2lst.lst. If it is actually present " -# "in the lst file, check that genes are ordered by increasing " -# "number in both lst and ffn files.\nGen file not created" -# " from test/data/annotate/test_files/prokka_out_for_test-supGene.ffn.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_gen_miss_crispr(): -# """ -# Test for situation where there are 2 CRISPRs in the lstinfo file, the first one -# is not in the ffn file, while the second one is in the ffn file. -# It should return an error message about CRISPR number not corresponding, as -# the first CRISPR found in ffn (CRISPR 1) corresponds to CRISPR2 in lstinfo file. -# Gene file should be removed, and the function should return False -# """ -# logger = my_logger() -# ffnseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-missCRISPR.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# genseq = os.path.join("test", "data", "annotate", "test_create_gen_missCRISPR.gen") -# assert not ffunc.create_gen(ffnseq, lstfile, genseq, logger[1]) -# assert not os.path.isfile(genseq) -# msg = ("Problem with CRISPR numbers in test/data/annotate/exp_files/res_tbl2lst.lst. " -# "CRISPR >prokka_out_for_test in ffn is CRISPR num 1, whereas it is annotated " -# "as CRISPR num 2 in lst file.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_gen_no_crispr_ffn(): -# """ -# Test that when the there is a CRISPR in the lst file, but not in the ffn file, -# everything goes well, as in some versions of prokka (1.12)n CRISPRs are not in ffn while -# they are specified in lst. Function should return True, and gene file created. -# """ -# ffnseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-noCRISPRffn.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# genseq = os.path.join("test", "data", "annotate", "test_create_gen_noCRISPRffn.gen") -# assert ffunc.create_gen(ffnseq, lstfile, genseq, my_logger()[1]) -# assert os.path.isfile(genseq) -# exp_file = os.path.join("test", "data", "annotate", "exp_files", -# "res_create_gen_noCRISPRffn.gen") -# with open(exp_file, "r") as expf, open(genseq, "r") as prtf: -# for line_exp, line_out in zip(expf, prtf): -# assert line_exp == line_out -# os.remove(genseq) - - -# def test_create_gen_wrong_header_sep(): -# """ -# Test that, when creating gen file from ffn and lst, if a header of ffn file is -# not in the right format (gene name and number are not separated by '_'), -# it writes an error, erases gen file, and returns False. -# """ -# logger = my_logger() -# ffnseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-wrongFormat.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# genseq = os.path.join("test", "data", "annotate", "test_create_gen_wrongHeadSep.gen") -# assert not ffunc.create_gen(ffnseq, lstfile, genseq, logger[1]) -# assert not os.path.isfile(genseq) -# msg = ("Unknown header format >JGIKIPIJ-00005 in test/data/annotate/test_files/" -# "prokka_out_for_test-wrongFormat.ffn.\n" -# "Gen file will not be created.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_gen_wrong_header_int(): -# """ -# Test that, when creating gen file from ffn and lst, if a header of ffn file is -# not in the right format (gene name and number are separated by '_', but gene num -# contains a letter), it writes an error, erases gen file, and returns False. -# """ -# logger = my_logger() -# ffnseq = os.path.join("test", "data", "annotate", "test_files", -# "prokka_out_for_test-wrongInt.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# genseq = os.path.join("test", "data", "annotate", "test_create_gen_wrongHeadInt.gen") -# assert not ffunc.create_gen(ffnseq, lstfile, genseq, logger[1]) -# assert not os.path.isfile(genseq) -# msg = ("Unknown header format >JGIKIPIJ_a00005 in test/data/annotate/test_files/" -# "prokka_out_for_test-wrongInt.ffn.\n" -# "Gen file will not be created.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_gen_wrong_lst_int(): -# """ -# Test that, when creating gen file from ffn and lst, if a gene name in lst file is -# not in the right format (gene name and number are separated by '_', but gene num -# contains a letter), it writes an error, erases gen file, and returns False. -# Because the gene name in ffn won't be found in lst (a it contains an error in lst). -# """ -# logger = my_logger() -# ffnseq = os.path.join("test", "data", "annotate", "test_files", "original_name.fna-prokkaRes", -# "prokka_out_for_test.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst-wrongGeneName.lst") -# genseq = os.path.join("test", "data", "annotate", "test_create_gen_wrongLstHeadInt.gen") -# assert not ffunc.create_gen(ffnseq, lstfile, genseq, logger[1]) -# assert not os.path.isfile(genseq) -# msg = ("Missing info for gene >JGIKIPIJ_00009 in test/data/annotate/exp_files/" -# "res_tbl2lst-wrongGeneName.lst. If it is actually present " -# "in the lst file, check that genes are ordered by increasing " -# "number in both lst and ffn files.\nGen file not created " -# "from test/data/annotate/test_files/original_name.fna-prokkaRes/" -# "prokka_out_for_test.ffn.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg - - -# def test_create_gen_ok(): -# """ -# Test that when everything is ok in both ffn and lst files, the gen file is -# created as expected. -# """ -# faaseq = os.path.join("test", "data", "annotate", "test_files", "original_name.fna-prokkaRes", -# "prokka_out_for_test.ffn") -# lstfile = os.path.join("test", "data", "annotate", "exp_files", "res_tbl2lst.lst") -# genseq = os.path.join("test", "data", "annotate", "test_create_gen.gen") -# assert ffunc.create_gen(faaseq, lstfile, genseq, my_logger()[1]) -# exp_file = os.path.join("test", "data", "annotate", "exp_files", "res_create_gen.gen") -# with open(exp_file, "r") as expf, open(genseq, "r") as prtf: -# for line_exp, line_out in zip(expf, prtf): -# assert line_exp == line_out -# os.remove(genseq) - - -# def test_handle_line_gff(): -# """ -# Check that given a line in lstinfo with no information (gene name, ecnumber, product, -# inference), it returns the good gff line -# """ -# line_lst = ("201\t743\tC\tCDS\tESCO.1015.00001.b0001_00001\tNA\t" -# "| NA | NA | NA") -# line_gff = ("H561_S27_L001__1\tProdigal:2.6\tCDS\t201\t743\t.\t-\t0\t" -# "ID=ONKACNIE_00001;inference=ab initio " -# "prediction:Prodigal:2.6;locus_tag=ONKACNIE_00001;product=hypothetical protein") -# outgff = "test_handle_line_gff.gff" -# gfff = open(outgff, "w") -# ffunc.handle_line_gff(line_lst, line_gff, gfff) -# gfff.close() -# with open(outgff, "r") as gf: -# lines = gf.readlines() -# assert len(lines) == 1 -# exp_line = ("ESCO.1015.00001.0001\tProdigal:2.6\tCDS\t201\t743\t.\t-\t.\t" -# "ID=ESCO.1015.00001.b0001_00001;locus_tag=ESCO.1015.00001.b0001_00001\n") -# assert lines[0] == exp_line -# os.remove(outgff) - - -# def test_handle_line_gff_ecnum(): -# """ -# Check that given a line in lstinfo with only ec_number information, -# it returns the good gff line -# """ -# line_lst = ("201\t743\tC\tCDS\tESCO.1015.00001.b0001_00001\tNA\t" -# "| NA | 123.995.4546 | NA") -# line_gff = ("H561_S27_L001__1\tProdigal:2.6\tCDS\t201\t743\t.\t-\t0\t" -# "ID=ONKACNIE_00001;inference=ab initio " -# "prediction:Prodigal:2.6;locus_tag=ONKACNIE_00001;product=hypothetical protein") -# outgff = "test_handle_line_gff-ecnum.gff" -# gfff = open(outgff, "w") -# ffunc.handle_line_gff(line_lst, line_gff, gfff) -# gfff.close() -# with open(outgff, "r") as gf: -# lines = gf.readlines() -# assert len(lines) == 1 -# exp_line = ("ESCO.1015.00001.0001\tProdigal:2.6\tCDS\t201\t743\t.\t-\t.\t" -# "ID=ESCO.1015.00001.b0001_00001;eC_number=123.995.4546;" -# "locus_tag=ESCO.1015.00001.b0001_00001\n") -# assert lines[0] == exp_line -# os.remove(outgff) - - -# def test_handle_line_gff_gene(): -# """ -# Check that given a line in lstinfo with only gene name information, -# it returns the good gff line -# """ -# line_lst = ("201\t743\tC\tCDS\tESCO.1015.00001.b0001_00001\tge4a\t" -# "| NA | NA | NA") -# line_gff = ("H561_S27_L001__1\tProdigal:2.6\tCDS\t201\t743\t.\t-\t0\t" -# "ID=ONKACNIE_00001;inference=ab initio " -# "prediction:Prodigal:2.6;locus_tag=ONKACNIE_00001;product=hypothetical protein") -# outgff = "test_handle_line_gff-gene.gff" -# gfff = open(outgff, "w") -# ffunc.handle_line_gff(line_lst, line_gff, gfff) -# gfff.close() -# with open(outgff, "r") as gf: -# lines = gf.readlines() -# assert len(lines) == 1 -# exp_line = ("ESCO.1015.00001.0001\tProdigal:2.6\tCDS\t201\t743\t.\t-\t.\t" -# "ID=ESCO.1015.00001.b0001_00001;Name=ge4a;gene=ge4a;" -# "locus_tag=ESCO.1015.00001.b0001_00001\n") -# assert lines[0] == exp_line -# os.remove(outgff) - - -# def test_handle_line_gff_inf(): -# """ -# Check that given a line in lstinfo with only inference information, -# it returns the good gff line -# """ -# line_lst = ("201\t743\tC\tCDS\tESCO.1015.00001.b0001_00001\tNA\t" -# "| NA | NA | ab initio prediction:Prodigal:2.6") -# line_gff = ("H561_S27_L001__1\tProdigal:2.6\tCDS\t201\t743\t.\t-\t0\t" -# "ID=ONKACNIE_00001;inference=ab initio " -# "prediction:Prodigal:2.6;locus_tag=ONKACNIE_00001;product=hypothetical protein") -# outgff = "test_handle_line_gff-inf.gff" -# gfff = open(outgff, "w") -# ffunc.handle_line_gff(line_lst, line_gff, gfff) -# gfff.close() -# with open(outgff, "r") as gf: -# lines = gf.readlines() -# assert len(lines) == 1 -# exp_line = ("ESCO.1015.00001.0001\tProdigal:2.6\tCDS\t201\t743\t.\t-\t.\t" -# "ID=ESCO.1015.00001.b0001_00001;inference=ab initio " -# "prediction:Prodigal:2.6;locus_tag=ESCO.1015.00001.b0001_00001\n") -# assert lines[0] == exp_line -# os.remove(outgff) - - -# def test_handle_line_gff_prod(): -# """ -# Check that given a line in lstinfo with only product information, -# it returns the good gff line -# """ -# line_lst = ("201\t743\tC\tCDS\tESCO.1015.00001.b0001_00001\tNA\t" -# "| hypothetical protein | NA | NA") -# line_gff = ("H561_S27_L001__1\tProdigal:2.6\tCDS\t201\t743\t.\t-\t0\t" -# "ID=ONKACNIE_00001;inference=ab initio " -# "prediction:Prodigal:2.6;locus_tag=ONKACNIE_00001;product=hypothetical protein") -# outgff = "test_handle_line_gff-prod.gff" -# gfff = open(outgff, "w") -# ffunc.handle_line_gff(line_lst, line_gff, gfff) -# gfff.close() -# with open(outgff, "r") as gf: -# lines = gf.readlines() -# assert len(lines) == 1 -# exp_line = ("ESCO.1015.00001.0001\tProdigal:2.6\tCDS\t201\t743\t.\t-\t.\t" -# "ID=ESCO.1015.00001.b0001_00001;locus_tag=ESCO.1015.00001.b0001_00001;" -# "product=hypothetical protein\n") -# assert lines[0] == exp_line -# os.remove(outgff) - - -# def test_generate_gff(): -# """ -# Test creating gff file. -# """ -# prokgff = os.path.join("test", "data", "annotate", "test_files", "prokka_out_gff.gff") -# gffout = os.path.join("test", "data", "annotate", "test_creategff.gff") -# lstgenome = os.path.join("test", "data", "annotate", "test_files", "lstinfo_for_gff.lst") -# assert ffunc.generate_gff(prokgff, gffout, lstgenome, my_logger()[1]) -# exp_gff = os.path.join("test", "data", "annotate", "exp_files", "res_create_gff.gff") -# with open(gffout, "r") as gffo, open(exp_gff, "r") as expf: -# for line_exp, line_out in zip(expf, gffo): -# assert line_exp == line_out -# os.remove(gffout) - - -# def test_generate_gff_error(): -# """ -# Test creating gff file. -# """ -# logger = my_logger() -# prokgff = os.path.join("test", "data", "annotate", "test_files", "prokka_out_gff-error.gff") -# lstgenome = os.path.join("test", "data", "annotate", "test_files", "lstinfo_for_gff.lst") -# gffout = os.path.join("test", "data", "annotate", "test_creategff.gff") -# assert not ffunc.generate_gff(prokgff, gffout, lstgenome, logger[1]) -# os.remove(gffout) -# q = logger[0] -# assert q.qsize() == 1 -# logfound = q.get() -# msg = ("Problem with your gff file. '##FASTA' is not a gff entry line, whereas it should " -# "correspond to '863\t1795\tD\tCDS\tESCO.1015.00001.b0003_00016\tNA\t" -# "| hypothetical protein | NA | NA'") -# assert msg in logfound.message -# assert logfound.levelname == "ERROR" - - - - -# def test_format1genome(): -# """ -# Test that formatting a genome (making .prt, .gen, .fna, .lst) works, with a genome -# which did not change name between prokka run and format step. -# """ -# gpath = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-split5N.fna-short-contig.fna") -# name = "test.0417.00002" -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# lst_dir = os.path.join("test", "data", "annotate") -# prot_dir = lst_dir -# gene_dir = lst_dir -# rep_dir = lst_dir -# gff_dir = lst_dir -# assert ffunc.format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, -# gene_dir, rep_dir, gff_dir, my_logger()[1]) -# # Check that all files were created -# assert os.path.isfile(os.path.join(lst_dir, name + ".lst")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".fna")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".prt")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".gen")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".gff")) -# # Check the contents of the files -# explst = os.path.join(prok_path, "res_format-B2.lst") -# expprt = os.path.join(prok_path, "res_format-B2.prt") -# expgen = os.path.join(prok_path, "res_format-B2.gen") -# expgff = os.path.join(prok_path, "res_format-B2.gff") -# with open(explst, "r") as lstf, open(os.path.join(lst_dir, name + ".lst"), "r") as lsto: -# for line_exp, line_out in zip(lstf, lsto): -# assert line_exp == line_out -# with open(expprt, "r") as expf, open(os.path.join(lst_dir, name + ".prt"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# with open(expgen, "r") as expf, open(os.path.join(lst_dir, name + ".gen"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# with open(gpath, "r") as expf, open(os.path.join(lst_dir, name + ".fna"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# with open(expgff, "r") as expf, open(os.path.join(lst_dir, name + ".gff"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# os.remove(os.path.join(lst_dir, name + ".lst")) -# os.remove(os.path.join(lst_dir, name + ".prt")) -# os.remove(os.path.join(lst_dir, name + ".fna")) -# os.remove(os.path.join(lst_dir, name + ".gen")) -# os.remove(os.path.join(lst_dir, name + ".gff")) - - -# def test_format1genome_change_head(): -# """ -# Test that formatting a genome (making .prt, .gen, .fna, .lst) works, with a genome -# which changed its name between prokka and format step. -# """ -# ginit = os.path.join("test", "data", "annotate", "genomes", "B2_A3_5.fasta-changeName.fna") -# gpath = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-changeName.fna-short-contig.fna") -# shutil.copyfile(ginit, gpath) -# name = "test.0417.00002" -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# lst_dir = os.path.join("test", "data", "annotate") -# prot_dir = lst_dir -# gene_dir = lst_dir -# rep_dir = lst_dir -# gff_dir = lst_dir -# assert ffunc.format_one_genome(gpath, name, prok_path, lst_dir, -# prot_dir, gene_dir, rep_dir, gff_dir, my_logger()[1]) -# # Check that all files were created -# assert os.path.isfile(os.path.join(lst_dir, name + ".lst")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".fna")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".prt")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".gen")) -# assert os.path.isfile(os.path.join(lst_dir, name + ".gff")) -# # Check the contents of the files -# explst = os.path.join(prok_path, "res_format-B2.lst") -# expprt = os.path.join(prok_path, "res_format-B2.prt") -# expgen = os.path.join(prok_path, "res_format-B2.gen") -# expgff = os.path.join(prok_path, "res_format-B2.gff") -# expreplicons = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-split5N.fna-short-contig.fna") -# with open(explst, "r") as lstf, open(os.path.join(lst_dir, name + ".lst"), "r") as lsto: -# for line_exp, line_out in zip(lstf, lsto): -# assert line_exp == line_out -# with open(expprt, "r") as expf, open(os.path.join(lst_dir, name + ".prt"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# with open(expgen, "r") as expf, open(os.path.join(lst_dir, name + ".gen"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# with open(expgff, "r") as expf, open(os.path.join(lst_dir, name + ".gff"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# with open(expreplicons, "r") as expf, open(os.path.join(lst_dir, name + ".fna"), "r") as outf: -# for line_exp, line_out in zip(expf, outf): -# assert line_exp == line_out -# os.remove(os.path.join(lst_dir, name + ".lst")) -# os.remove(os.path.join(lst_dir, name + ".prt")) -# os.remove(os.path.join(lst_dir, name + ".fna")) -# os.remove(os.path.join(lst_dir, name + ".gen")) -# os.remove(os.path.join(lst_dir, name + ".gff")) -# os.remove(gpath) - - -# def test_format1genome_problemgen(): -# """ -# Test that formatting a genome (making .prt, .gen, .fna, .lst) returns an error message -# and does not create any output file if there is a problem while converting the -# .ffn to .gen -# """ -# logger = my_logger() -# gpath = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-problems.fna-short-contig.fna") -# name = "test.0417.00002" -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# tbl_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# tblout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# shutil.copyfile(tbl_init, tblout) -# gff_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".gff") -# gffout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".gff") -# shutil.copyfile(gff_init, gffout) -# lst_dir = os.path.join("test", "data", "annotate") -# prot_dir = lst_dir -# gene_dir = lst_dir -# rep_dir = lst_dir -# gff_dir = lst_dir -# assert not ffunc.format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, -# gene_dir, rep_dir, gff_dir, logger[1]) -# # Check that all files were not created -# assert not os.path.isfile(os.path.join(lst_dir, name + ".lst")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".fna")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".prt")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gen")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gff")) -# msg = ("Unknown header format >EPKOMDHM_i00002 hypothetical protein in " -# "test/data/annotate/exp_files/B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes/" -# "test.0417.00002.ffn.\n" -# "Gen file will not be created.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg -# # remove tblout which was copied for this test -# os.remove(tblout) -# os.remove(gffout) - - -# def test_format1genome_problemprt(): -# """ -# Test that formatting a genome (making .prt, .gen, .fna, .lst) works, with a genome -# which did not change name between prokka run and format step. -# """ -# logger = my_logger() -# gpath = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-problems.fna-short-contig.fna") -# name = "test.0417.00002" -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# # copy tbl without errors to error prokka dir -# tbl_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# tblout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# shutil.copyfile(tbl_init, tblout) -# # copy gff without errors to error prokka dir -# gff_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".gff") -# gffout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".gff") -# shutil.copyfile(gff_init, gffout) -# # copy ffn without error to error prokka dir -# ffn_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".ffn") -# ffn_ok = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".ffn") -# ffn_error = ffn_ok + "-error" -# # change name of ffn file with error to keep it for later (used for tests) -# shutil.copyfile(ffn_ok, ffn_error) -# # copy ffn without error to prokka res (erasing ffn with error) -# shutil.copyfile(ffn_init, ffn_ok) -# lst_dir = os.path.join("test", "data", "annotate") -# prot_dir = lst_dir -# gene_dir = lst_dir -# rep_dir = lst_dir -# gff_dir = lst_dir -# assert not ffunc.format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, -# gene_dir, rep_dir, gff_dir, logger[1]) -# # Check that all files were not created -# assert not os.path.isfile(os.path.join(lst_dir, name + ".lst")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".fna")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".prt")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gen")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gff")) -# msg = ("Unknown header format >EPKOMDHM00003 hypothetical protein in " -# "test/data/annotate/exp_files/B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes/" -# "test.0417.00002.faa. " -# "Error: invalid literal for int() with base 10: '>EPKOMDHM00003'\n" -# "Prt file not created from test/data/annotate/exp_files/" -# "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes/test.0417.00002.faa.") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg -# # remove files which were copied for this test (tblout). And rename ffn with errors -# # to its original name. -# os.rename(ffn_error, ffn_ok) -# os.remove(tblout) -# os.remove(gffout) - - -# def test_format1genome_problemgff(): -# """ -# Test that formatting a genome (making .prt, .gen, .fna, .lst) returns an error message -# and does not create any output file if there is a problem while converting the -# .ffn to .gen -# """ -# logger = my_logger() -# gpath = os.path.join("test", "data", "annotate", "genomes", -# "B2_A3_5.fasta-problems.fna-short-contig.fna") -# name = "test.0417.00002" -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# tbl_init = os.path.join(prok_path, "B2_A3_5.fasta-split5N.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# tblout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".tbl") -# shutil.copyfile(tbl_init, tblout) -# gff_init = os.path.join("test", "data", "annotate", "test_files", "prokka_out_gff-error.gff") -# gffout = os.path.join(prok_path, "B2_A3_5.fasta-problems.fna-short-contig.fna-prokkaRes", -# name + ".gff") -# shutil.copyfile(gff_init, gffout) -# lst_dir = os.path.join("test", "data", "annotate") -# prot_dir = lst_dir -# gene_dir = lst_dir -# rep_dir = lst_dir -# gff_dir = lst_dir -# assert not ffunc.format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, -# gene_dir, rep_dir, gff_dir, logger[1]) -# # Check that all files were not created -# assert not os.path.isfile(os.path.join(lst_dir, name + ".lst")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".fna")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".prt")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gen")) -# assert not os.path.isfile(os.path.join(lst_dir, name + ".gff")) -# msg = ("Problem with your gff file. '##FASTA' is not a gff entry line, whereas it should " -# "correspond to '11249\t12328\tD\tCDS\ttest.0417.00002.i0002_00016\tNA\t" -# "| hypothetical protein | NA | NA'") -# q = logger[0] -# assert q.qsize() == 1 -# assert q.get().message == msg -# # remove tblout which was copied for this test -# os.remove(tblout) -# os.remove(gffout) - - -# def test_format_all(): + # Create prokka output dir and pur expected files (empty, we want to generate an error) + gpath = os.path.join(GENEPATH, "wrong.fasta") + open(gpath, "w").close() + prodi_path = gpath + "-prodigalRes" + os.makedirs(prodi_path) + gff_res = os.path.join(prodi_path, "toto.gff") + ffn_res = os.path.join(prodi_path, "toto.ffn") + faa_res = os.path.join(prodi_path, "toto.faa") + for file in [gff_res, ffn_res, faa_res]: + open(file, "w").close() + # Create output directory for .fna file + rep_dir = os.path.join(GENEPATH, "Replicons") + os.makedirs(rep_dir) + # Get args for function + args = ("wrong.fasta", "name", gpath, GENEPATH, "lst/dir", "prot/dir", + "gene/dir", rep_dir, "gff/dir", True, my_logger()[0]) + ok_format, genome = ffunc.handle_genome(args) + assert ok_format == False + assert genome == "wrong.fasta" + + +def test_handle_genome_formatok(caplog): + """ + Test that when we try to format a genome which was annotated by prokka without any problem + It returns True associated with the genome name + """ + caplog.set_level(logging.DEBUG) + name = "test.0417.00002" + # path to original genome, given to prodigal for annotation + gpath = os.path.join(ANNOTEDIR, "test_files", "original_name.fna") + prok_path = os.path.join(ANNOTEDIR, "test_files") + # Create result directories + prot_dir = os.path.join(GENEPATH, "Proteins") + lst_dir = os.path.join(GENEPATH, "LSTINFO") + rep_dir = os.path.join(GENEPATH, "Replicons") + gene_dir = os.path.join(GENEPATH, "Genes") + gff_dir = os.path.join(GENEPATH, "gff") + os.makedirs(prot_dir) + os.makedirs(lst_dir) + os.makedirs(rep_dir) + os.makedirs(gene_dir) + os.makedirs(gff_dir) + # Get args for function + args = ("original_name", name, gpath, prok_path, lst_dir, prot_dir, + gene_dir, rep_dir, gff_dir, False, my_logger()[0]) + ok_format, genome = ffunc.handle_genome(args) + assert ok_format == True + assert genome == "original_name" + # Check generated files + exp_rep = os.path.join(EXP_ANNOTE, "res_created_rep-prokka.fna") + res_rep_file = os.path.join(rep_dir, "test.0417.00002.fna") + assert tutil.compare_order_content(exp_rep, res_rep_file) + # Proteins + exp_prt = os.path.join(EXP_ANNOTE, "res_create_prt_prokka.faa") + res_prt_file = os.path.join(prot_dir, "test.0417.00002.prt") + assert tutil.compare_order_content(exp_prt, res_prt_file) + # Genes + exp_gen = os.path.join(EXP_ANNOTE, "res_create_gene_prokka.gen") + res_gen_file = os.path.join(gene_dir, "test.0417.00002.gen") + assert tutil.compare_order_content(exp_gen, res_gen_file) + # LSTINFO + exp_lst = os.path.join(EXP_ANNOTE, "res_create_lst-prokka.lst") + res_lst_file = os.path.join(lst_dir, "test.0417.00002.lst") + assert tutil.compare_order_content(exp_lst, res_lst_file) + # gff + exp_gff = os.path.join(EXP_ANNOTE, "res_create_gff-prokka.gff") + res_gff_file = os.path.join(gff_dir, "test.0417.00002.gff") + assert tutil.compare_order_content(exp_gff, res_gff_file) + + +def test_handle_genome_formatok_prodigal(caplog): + """ + Test that when we try to format a genome which was annotated by prodigal without any problem + It returns True associated with the genome name + """ + caplog.set_level(logging.DEBUG) + name_orig = "prodigal.outtest.ok" + name = "test.0417.00002" + # path to original genome, given to prodigal for annotation + gpath = os.path.join(ANNOTEDIR, "test_files", "original_name.fna") + prodi_path = os.path.join(ANNOTEDIR, "test_files") + # Create result directories + prot_dir = os.path.join(GENEPATH, "Proteins") + lst_dir = os.path.join(GENEPATH, "LSTINFO") + rep_dir = os.path.join(GENEPATH, "Replicons") + gene_dir = os.path.join(GENEPATH, "Genes") + gff_dir = os.path.join(GENEPATH, "gff") + os.makedirs(prot_dir) + os.makedirs(lst_dir) + os.makedirs(rep_dir) + os.makedirs(gene_dir) + os.makedirs(gff_dir) + # Get args for function + args = (name_orig, name, gpath, prodi_path, lst_dir, prot_dir, + gene_dir, rep_dir, gff_dir, True, my_logger()[0]) + ok_format, genome = ffunc.handle_genome(args) + assert ok_format == True + assert genome == name_orig + # Check generated files + exp_rep = os.path.join(EXP_ANNOTE, "res_created_rep-prokka.fna") + res_rep_file = os.path.join(rep_dir, "test.0417.00002.fna") + assert tutil.compare_order_content(exp_rep, res_rep_file) + # Proteins + exp_prt = os.path.join(EXP_ANNOTE, "res_create_prt_prodigal.faa") + res_prt_file = os.path.join(prot_dir, "test.0417.00002.prt") + assert tutil.compare_order_content(exp_prt, res_prt_file) + # Genes + exp_gen = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.gen") + res_gen_file = os.path.join(gene_dir, "test.0417.00002.gen") + assert tutil.compare_order_content(exp_gen, res_gen_file) + # LSTINFO + exp_lst = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.lst") + res_lst_file = os.path.join(lst_dir, "test.0417.00002.lst") + assert tutil.compare_order_content(exp_lst, res_lst_file) + # gff + exp_gff = os.path.join(EXP_ANNOTE, "res_create_gff_prodigal.gff") + res_gff_file = os.path.join(gff_dir, "test.0417.00002.gff") + assert tutil.compare_order_content(exp_gff, res_gff_file) + + +# def test_format_all_prokka(): # """ # Test that when giving a list of genomes, for which prokka ran without problem, # they are formatted, with all expected files created. # """ -# # genomes = {genome: [name, gpath, size, nbcont, l90]} +# # genomes = {genome: [name, gpath, to_annot, size, nbcont, l90]} # initnames = ["H299_H561.fasta", "B2_A3_5.fasta-changeName.fna"] -# initpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in initnames] +# initpaths = [os.path.join(ANNOTEDIR, "genomes", name) for name in initnames] # gnames = ["H299_H561.fasta-short-contig.fna", "B2_A3_5.fasta-changeName.fna-short-contig.fna"] # onames = ["test_runprokka_H299", "test.0417.00002"] -# gpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in gnames] +# gpaths = [os.path.join(ANNOTEDIR, "genomes", name) for name in gnames] # for f1, f2 in zip(initpaths, gpaths): # shutil.copyfile(f1, f2) -# genomes = {gnames[0]: [onames[0], gpaths[0], 12656, 3, 1], -# gnames[1]: [onames[1], gpaths[1], 456464645, 5, 1] +# genomes = {gnames[0]: [onames[0], gpaths[0], gpaths[0], 12656, 3, 1], +# gnames[1]: [onames[1], gpaths[1], gpaths[1], 456464645, 5, 1] # } -# prok_path = os.path.join("test", "data", "annotate", "exp_files") -# res_path = os.path.join("test", "data", "annotate") -# results = {gname: True for gname in gnames} -# skipped, skipped_format = ffunc.format_genomes(genomes, results, res_path, -# prok_path, threads=4) -# assert skipped == [] +# prok_path = os.path.join(ANNOTEDIR, "exp_files") +# res_path = GENEPATH +# skipped_format = ffunc.format_genomes(genomes, res_path, +# prok_path, False, threads=4) # assert skipped_format == [] # lstfiles = [os.path.join(res_path, "LSTINFO", name + ".lst") for name in onames] # prtfiles = [os.path.join(res_path, "Proteins", name + ".prt") for name in onames] @@ -1042,8 +320,6 @@ def test_handle_genome_nores(): # shutil.rmtree(os.path.join(res_path, "Genes")) # shutil.rmtree(os.path.join(res_path, "Replicons")) # shutil.rmtree(os.path.join(res_path, "gff3")) -# for f in gpaths: -# os.remove(f) # def test_format_all_result_false():