Skip to content
Snippets Groups Projects
Commit fc2f89bf authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

Add start of unit tests for 'format_all_genomes'

test_format_1bb_prodigal: files created for the test are ok. Must now adapt the test to check expected outputs
parent c50131db
No related branches found
No related tags found
No related merge requests found
Pipeline #39124 passed
...@@ -84,7 +84,7 @@ def format_genomes(genomes_ok, res_path, annot_path, prodigal_only, threads=1, q ...@@ -84,7 +84,7 @@ def format_genomes(genomes_ok, res_path, annot_path, prodigal_only, threads=1, q
res_path : str res_path : str
path to folder where the 4 directories must be created path to folder where the 4 directories must be created
annot_path : str annot_path : str
path to folder named "<genome_name>-[prokka, prodigal]Res" where all prokka/prodigal path to folder containing "<genome_name>-[prokka, prodigal]Res" where all prokka/prodigal
results are saved. results are saved.
prodigal_only: True if it was annotated by prodigal, False if annotated by prokka prodigal_only: True if it was annotated by prodigal, False if annotated by prokka
threads : int threads : int
...@@ -94,10 +94,9 @@ def format_genomes(genomes_ok, res_path, annot_path, prodigal_only, threads=1, q ...@@ -94,10 +94,9 @@ def format_genomes(genomes_ok, res_path, annot_path, prodigal_only, threads=1, q
Returns Returns
------- -------
(skipped, skipped_format) : tuple skipped_format : list
* skipped : list of genomes skipped because they had a problem in annotation step list of genomes skipped because they had a problem in format step
* skipped_format : list of genomes skipped because they had a problem in format step
""" """
main_logger.info("Formatting all genomes") main_logger.info("Formatting all genomes")
lst_dir = os.path.join(res_path, "LSTINFO") lst_dir = os.path.join(res_path, "LSTINFO")
......
...@@ -39,7 +39,7 @@ def setup_teardown_module(): ...@@ -39,7 +39,7 @@ def setup_teardown_module():
print("setup") print("setup")
yield yield
shutil.rmtree(GENEPATH) # shutil.rmtree(GENEPATH)
print("teardown") print("teardown")
# Define variables and functions used by several tests # Define variables and functions used by several tests
...@@ -287,138 +287,124 @@ def test_handle_genome_formatok_prodigal(caplog): ...@@ -287,138 +287,124 @@ def test_handle_genome_formatok_prodigal(caplog):
assert tutil.compare_order_content(exp_gff, res_gff_file) assert tutil.compare_order_content(exp_gff, res_gff_file)
# def test_format_all_prokka(): def test_format_all_prokka(caplog):
# """ """
# Test that when giving a list of genomes, for which prokka ran without problem, Test that when giving a list of genomes, for which prokka ran without problem,
# they are formatted, with all expected files created. they are formatted, with all expected files created.
# """ """
# # genomes = {genome: [name, gpath, to_annot, size, nbcont, l90]} caplog.set_level(logging.DEBUG)
# initnames = ["H299_H561.fasta", "B2_A3_5.fasta-changeName.fna"] # genomes = {genome: [name, gpath, to_annot, size, nbcont, l90]}
# initpaths = [os.path.join(ANNOTEDIR, "genomes", name) for name in initnames] # Get genome names we want to format (with their path)
# gnames = ["H299_H561.fasta-short-contig.fna", "B2_A3_5.fasta-changeName.fna-short-contig.fna"] gnames = ["H299_H561.fasta", "B2_A3_5.fasta-changeName.fna"]
# onames = ["test_runprokka_H299", "test.0417.00002"] gpaths = [os.path.join(ANNOTEDIR, "genomes", name) for name in gnames]
# gpaths = [os.path.join(ANNOTEDIR, "genomes", name) for name in gnames] onames = ["test_runprokka_H299", "test.0417.00002"]
# for f1, f2 in zip(initpaths, gpaths): genomes = {gnames[0]: [onames[0], gpaths[0], gpaths[0], 12656, 3, 1],
# shutil.copyfile(f1, f2) gnames[1]: [onames[1], gpaths[1], gpaths[1], 456464645, 5, 1]
# genomes = {gnames[0]: [onames[0], gpaths[0], gpaths[0], 12656, 3, 1], }
# gnames[1]: [onames[1], gpaths[1], gpaths[1], 456464645, 5, 1] res_path = GENEPATH
# } annotated_path = os.path.join(ANNOTEDIR, "exp_files")
# prok_path = os.path.join(ANNOTEDIR, "exp_files") # Format both genomes
# res_path = GENEPATH skipped_format = ffunc.format_genomes(genomes, res_path, annotated_path, False, threads=2)
# skipped_format = ffunc.format_genomes(genomes, res_path, assert skipped_format == []
# prok_path, False, threads=4) # Get all names of expected output files
# assert skipped_format == [] exp_dir = os.path.join(EXP_ANNOTE, "res_formatAll", "prokka")
# lstfiles = [os.path.join(res_path, "LSTINFO", name + ".lst") for name in onames] exp_folders = ["LSTINFO", "Proteins", "Genes", "Replicons", "gff3"]
# prtfiles = [os.path.join(res_path, "Proteins", name + ".prt") for name in onames] exp_extensions = [".lst", ".prt", ".gen", ".fna", ".gff"]
# genfiles = [os.path.join(res_path, "Genes", name + ".gen") for name in onames] # Check that output files are created, and contain what is expected
# repfiles = [os.path.join(res_path, "Replicons", name + ".fna") for name in onames] for fol, ext in zip(exp_folders, exp_extensions):
# gfffiles = [os.path.join(res_path, "gff3", name + ".gff") for name in onames] exp_files = [os.path.join(exp_dir, fol, name + ext) for name in onames]
# for f in lstfiles + prtfiles + genfiles + repfiles + gfffiles: res_files = [os.path.join(res_path, fol, name + ext) for name in onames]
# assert os.path.isfile(f) for res, exp in zip(res_files, exp_files):
# shutil.rmtree(os.path.join(res_path, "LSTINFO")) assert os.path.isfile(res)
# shutil.rmtree(os.path.join(res_path, "Proteins")) assert tutil.compare_order_content(res, exp)
# shutil.rmtree(os.path.join(res_path, "Genes")) # Check log
# shutil.rmtree(os.path.join(res_path, "Replicons")) assert "Formatting all genomes" in caplog.text
# shutil.rmtree(os.path.join(res_path, "gff3"))
def test_format_all_prodigal(caplog):
# def test_format_all_result_false(): """
# """ Test that when giving a list of genomes, for which prokka ran without problem,
# Test that when giving a list of 2 genomes, 1 for which prokka ran without problem, they are formatted, with all expected files created.
# 1 for which prokka had problems (given with False in results), """
# the correct genome is formatted, with all caplog.set_level(logging.DEBUG)
# expected files created, and the genome with problems is not formatted. # genomes = {genome: [name, gpath, to_annot, size, nbcont, l90]}
# """ # Get genome names we want to format (with their path)
# # genomes = {genome: [name, gpath, size, nbcont, l90]} gnames = ["H299_H561.fasta", "B2_A3_5.fasta-changeName.fna"]
# initnames = ["H299_H561.fasta", "B2_A3_5.fasta-changeName.fna"] gpaths = [os.path.join(ANNOTEDIR, "genomes", name) for name in gnames]
# initpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in initnames] onames = ["test_runprokka_H299", "test.0417.00002"]
# gnames = ["H299_H561.fasta-short-contig.fna", "B2_A3_5.fasta-changeName.fna-short-contig.fna"] genomes = {gnames[0]: [onames[0], gpaths[0], gpaths[0], 12656, 3, 1],
# onames = ["test_runprokka_H299", "test.0417.00002"] gnames[1]: [onames[1], gpaths[1], gpaths[1], 456464645, 5, 1]
# gpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in gnames] }
# for f1, f2 in zip(initpaths, gpaths): res_path = GENEPATH
# shutil.copyfile(f1, f2) annotated_path = os.path.join(ANNOTEDIR, "exp_files")
# genomes = {gnames[0]: [onames[0], gpaths[0], 12656, 3, 1], # Format both genomes
# gnames[1]: [onames[1], gpaths[1], 456464645, 5, 1] skipped_format = ffunc.format_genomes(genomes, res_path, annotated_path, True, threads=2)
# } assert skipped_format == []
# prok_path = os.path.join("test", "data", "annotate", "exp_files") # Get all names of expected output files
# res_path = os.path.join("test", "data", "annotate") exp_dir = os.path.join(EXP_ANNOTE, "res_formatAll", "prodigal")
# results = {gnames[0]: True, gnames[1]: False} exp_folders = ["LSTINFO", "Proteins", "Genes", "Replicons", "gff3"]
# skipped, skipped_format = ffunc.format_genomes(genomes, results, res_path, prok_path) exp_extensions = [".lst", ".prt", ".gen", ".fna", ".gff"]
# assert skipped == ["B2_A3_5.fasta-changeName.fna-short-contig.fna"] # Check that output files are created, and contain what is expected
# assert skipped_format == [] for fol, ext in zip(exp_folders, exp_extensions):
# lstfiles = os.path.join(res_path, "LSTINFO") exp_files = [os.path.join(exp_dir, fol, name + ext) for name in onames]
# prtfiles = os.path.join(res_path, "Proteins") res_files = [os.path.join(res_path, fol, name + ext) for name in onames]
# genfiles = os.path.join(res_path, "Genes") for res, exp in zip(res_files, exp_files):
# repfiles = os.path.join(res_path, "Replicons") assert os.path.isfile(res)
# gfffiles = os.path.join(res_path, "gff3") assert tutil.compare_order_content(res, exp)
# assert os.path.isfile(os.path.join(lstfiles, onames[0] + ".lst")) # Check log
# assert not os.path.isfile(os.path.join(lstfiles, onames[1] + ".lst")) assert "Formatting all genomes" in caplog.text
# assert os.path.isfile(os.path.join(prtfiles, onames[0] + ".prt"))
# assert not os.path.isfile(os.path.join(prtfiles, onames[1] + ".prt"))
# assert os.path.isfile(os.path.join(genfiles, onames[0] + ".gen")) def test_format_1pb_prodigal(caplog):
# assert not os.path.isfile(os.path.join(genfiles, onames[1] + ".gen")) """
# assert os.path.isfile(os.path.join(repfiles, onames[0] + ".fna")) Test that when giving a list of genomes, 1 that is correctly formatted, and 1 has a pb,
# assert not os.path.isfile(os.path.join(repfiles, onames[1] + ".fna")) it returns the last one in skipped_format
# assert os.path.isfile(os.path.join(gfffiles, onames[0] + ".gff")) """
# assert not os.path.isfile(os.path.join(gfffiles, onames[1] + ".gff")) caplog.set_level(logging.DEBUG)
# shutil.rmtree(os.path.join(res_path, "LSTINFO")) # GENOME 2: Create empty original genome file
# shutil.rmtree(os.path.join(res_path, "Proteins")) genome1 = "wrong.fasta"
# shutil.rmtree(os.path.join(res_path, "Genes")) gpath1 = os.path.join(GENEPATH, "wrong.fasta")
# shutil.rmtree(os.path.join(res_path, "Replicons")) open(gpath1, "w").close()
# shutil.rmtree(os.path.join(res_path, "gff3")) # Add prodigal (empty) result files to prodigalRes directory
# for f in gpaths: prodi_path = gpath1 + "-prodigalRes"
# os.remove(f) os.makedirs(prodi_path)
gff_res = os.path.join(prodi_path, "toto.gff")
ffn_res = os.path.join(prodi_path, "toto.ffn")
# def test_format_all_not_result(): faa_res = os.path.join(prodi_path, "toto.faa")
# """ for file in [gff_res, ffn_res, faa_res]:
# Test that when giving a list of 2 genomes, but only 1 is in the results list (and prokka ran open(file, "w").close()
# without problems for it), the correct genome is formatted, with all # Create output directory for .fna file
# expected files created, and the other genome is not formatted, and does not appear in rep_dir = os.path.join(GENEPATH, "Replicons")
# skipped list (as it was removed from the study before annotation step, probably by QC). os.makedirs(rep_dir)
# """ # GENOME 2
# # genomes = {genome: [name, gpath, size, nbcont, l90]} genome2 = "H299_H561.fasta"
# initnames = ["H299_H561.fasta", "B2_A3_5.fasta-changeName.fna"] gpath2 = os.path.join(ANNOTEDIR, "genomes", genome2)
# initpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in initnames] # Copy results of prodigal for this genome to output dir (GENEPATH)
# gnames = ["H299_H561.fasta-short-contig.fna", "B2_A3_5.fasta-changeName.fna-short-contig.fna"] orig_res_files = os.path.join(EXP_ANNOTE, genome2 + '-prodigalRes')
# onames = ["test_runprokka_H299", "test.0417.00002"] used_res_path = os.path.join(GENEPATH, genome2 + "-prodigalRes")
# gpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in gnames] shutil.copytree(orig_res_files, used_res_path)
# for f1, f2 in zip(initpaths, gpaths): # genomes = {genome: [name, gpath, to_annot, size, nbcont, l90]}
# shutil.copyfile(f1, f2) genomes = {genome1: ["test_genome1", gpath1, gpath1, 12656, 3, 1],
# genomes = {gnames[0]: [onames[0], gpaths[0], 12656, 3, 1], genome2: ["test_H299_H561", gpath2, gpath2, 456464645, 5, 1]
# gnames[1]: [onames[1], gpaths[1], 456464645, 5, 1] }
# } res_path = GENEPATH
# prok_path = os.path.join("test", "data", "annotate", "exp_files") annotated_path = GENEPATH
# res_path = os.path.join("test", "data", "annotate") # Format both genomes
# results = {gnames[0]: True} skipped_format = ffunc.format_genomes(genomes, res_path, annotated_path, True, threads=2)
# skipped, skipped_format = ffunc.format_genomes(genomes, results, res_path, prok_path) assert skipped_format == ["wrong.fasta"]
# assert skipped == [] # Get all names of expected output files
# assert skipped_format == [] exp_dir = os.path.join(EXP_ANNOTE, "res_formatAll", "prodigal")
# lstfiles = os.path.join(res_path, "LSTINFO") exp_folders = ["LSTINFO", "Proteins", "Genes", "Replicons", "gff3"]
# prtfiles = os.path.join(res_path, "Proteins") exp_extensions = [".lst", ".prt", ".gen", ".fna", ".gff"]
# genfiles = os.path.join(res_path, "Genes") # Check that output files are created, and contain what is expected
# repfiles = os.path.join(res_path, "Replicons") for fol, ext in zip(exp_folders, exp_extensions):
# gfffiles = os.path.join(res_path, "gff3") exp_files = [os.path.join(exp_dir, fol, name + ext) for name in onames]
# assert os.path.isfile(os.path.join(lstfiles, onames[0] + ".lst")) res_files = [os.path.join(res_path, fol, name + ext) for name in onames]
# assert not os.path.isfile(os.path.join(lstfiles, onames[1] + ".lst")) for res, exp in zip(res_files, exp_files):
# assert os.path.isfile(os.path.join(prtfiles, onames[0] + ".prt")) assert os.path.isfile(res)
# assert not os.path.isfile(os.path.join(prtfiles, onames[1] + ".prt")) assert tutil.compare_order_content(res, exp)
# assert os.path.isfile(os.path.join(genfiles, onames[0] + ".gen")) # Check log
# assert not os.path.isfile(os.path.join(genfiles, onames[1] + ".gen")) assert "Formatting all genomes" in caplog.text
# assert os.path.isfile(os.path.join(repfiles, onames[0] + ".fna"))
# assert not os.path.isfile(os.path.join(repfiles, onames[1] + ".fna"))
# assert os.path.isfile(os.path.join(gfffiles, onames[0] + ".gff"))
# assert not os.path.isfile(os.path.join(gfffiles, onames[1] + ".gff"))
# shutil.rmtree(os.path.join(res_path, "LSTINFO"))
# shutil.rmtree(os.path.join(res_path, "Proteins"))
# shutil.rmtree(os.path.join(res_path, "Genes"))
# shutil.rmtree(os.path.join(res_path, "Replicons"))
# shutil.rmtree(os.path.join(res_path, "gff3"))
# for f in gpaths:
# os.remove(f)
# # probleme avec .fna de onames[0] qui n'est pas créé...
# def test_format_all_error(): # def test_format_all_error():
# """ # """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment