Skip to content
Snippets Groups Projects
Commit aa0a86b6 authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

.grp.<nucl or aa>.aln -> .<nucl or aa>.grp.aln

parent c07fc6b4
No related branches found
No related tags found
No related merge requests found
...@@ -78,7 +78,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet ...@@ -78,7 +78,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
all_alns_nucl, status_nucl = concat_alignments(fam_nums, prefix, "nucl", quiet) all_alns_nucl, status_nucl = concat_alignments(fam_nums, prefix, "nucl", quiet)
treedir = os.path.join(outdir, "Phylo-" + dname) treedir = os.path.join(outdir, "Phylo-" + dname)
os.makedirs(treedir, exist_ok=True) os.makedirs(treedir, exist_ok=True)
outfile_nucl = os.path.join(treedir, dname + ".grp.nucl.aln") outfile_nucl = os.path.join(treedir, dname + ".nucl.grp.aln")
res_nucl = launch_group_by_genome(all_genomes, all_alns_nucl, status_nucl, outfile_nucl, dname, "nucleic", quiet) res_nucl = launch_group_by_genome(all_genomes, all_alns_nucl, status_nucl, outfile_nucl, dname, "nucleic", quiet)
if not res_nucl: if not res_nucl:
utils.remove(all_alns_nucl) utils.remove(all_alns_nucl)
...@@ -87,7 +87,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet ...@@ -87,7 +87,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
sys.exit(1) sys.exit(1)
if prot_ali: if prot_ali:
all_alns_aa, status_aa = concat_alignments(fam_nums, prefix, "aa", quiet) all_alns_aa, status_aa = concat_alignments(fam_nums, prefix, "aa", quiet)
outfile_aa = os.path.join(treedir, dname + ".grp.aa.aln") outfile_aa = os.path.join(treedir, dname + ".aa.grp.aln")
res_aa = launch_group_by_genome(all_genomes, all_alns_aa, status_aa, outfile_aa, dname, "protein", quiet) res_aa = launch_group_by_genome(all_genomes, all_alns_aa, status_aa, outfile_aa, dname, "protein", quiet)
if not res_aa: if not res_aa:
utils.remove(all_alns_aa) utils.remove(all_alns_aa)
...@@ -104,8 +104,6 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet): ...@@ -104,8 +104,6 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
---------- ----------
fam_nums : [] fam_nums : []
list of family numbers list of family numbers
ali_type: str
nucl or aa
prefix : str prefix : str
path to ``aldir/<name of dataset>-[mafft-align or mafft-prt2nuc]`` path to ``aldir/<name of dataset>-[mafft-align or mafft-prt2nuc]``
(used to get extraction, alignment and btr files easily) (used to get extraction, alignment and btr files easily)
...@@ -129,7 +127,7 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet): ...@@ -129,7 +127,7 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
else: else:
logger.error(f"Not possible to concatenate '{ali_type}' type of alignments.") logger.error(f"Not possible to concatenate '{ali_type}' type of alignments.")
sys.exit(1) sys.exit(1)
output = f"{prefix}-complete.cat.{ali_type}.aln" output = f"{prefix}-complete.{ali_type}.cat.aln"
if os.path.isfile(output): if os.path.isfile(output):
logger.info(f"{ali_type} alignments already concatenated") logger.info(f"{ali_type} alignments already concatenated")
logger.warning(f"{ali_type} alignments already concatenated in {output}. Program will use " logger.warning(f"{ali_type} alignments already concatenated in {output}. Program will use "
...@@ -169,7 +167,7 @@ def launch_group_by_genome(all_genomes, all_alns, status, outfile, dname, type_a ...@@ -169,7 +167,7 @@ def launch_group_by_genome(all_genomes, all_alns, status, outfile, dname, type_a
dname : str dname : str
name of dataset name of dataset
type_ali : str type_ali : str
nucleic or aa nucleic or protein
quiet : bool quiet : bool
True if nothing must be sent to sdtout/stderr, False otherwise True if nothing must be sent to sdtout/stderr, False otherwise
...@@ -232,7 +230,6 @@ def group_by_genome(args): ...@@ -232,7 +230,6 @@ def group_by_genome(args):
""" """
all_genomes, all_alns, outfile = args all_genomes, all_alns, outfile = args
sequences = read_alignments(all_alns, all_genomes) sequences = read_alignments(all_alns, all_genomes)
logger.info(sequences)
if not sequences: if not sequences:
return False return False
write_groups(outfile, sequences) write_groups(outfile, sequences)
......
...@@ -298,7 +298,7 @@ def test_concat_nucl(caplog): ...@@ -298,7 +298,7 @@ def test_concat_nucl(caplog):
fam_nums = [1, 8, 11] fam_nums = [1, 8, 11]
quiet = False quiet = False
output, mess = pal.concat_alignments(fam_nums, prefix, "nucl", quiet) output, mess = pal.concat_alignments(fam_nums, prefix, "nucl", quiet)
assert output == os.path.join(aldir, dname + "-complete.cat.nucl.aln") assert output == os.path.join(aldir, dname + "-complete.nucl.cat.aln")
ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln") ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(output, ref_concat) assert tutil.compare_order_content(output, ref_concat)
assert mess == "Done" assert mess == "Done"
...@@ -330,7 +330,7 @@ def test_concat_aa(caplog): ...@@ -330,7 +330,7 @@ def test_concat_aa(caplog):
fam_nums = [1, 8, 11] fam_nums = [1, 8, 11]
quiet = False quiet = False
output, mess = pal.concat_alignments(fam_nums, prefix, "aa", quiet) output, mess = pal.concat_alignments(fam_nums, prefix, "aa", quiet)
assert output == os.path.join(aldir, dname + "-complete.cat.aa.aln") assert output == os.path.join(aldir, dname + "-complete.aa.cat.aln")
ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aa.aln") ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aa.aln")
assert tutil.compare_order_content(output, ref_concat) assert tutil.compare_order_content(output, ref_concat)
assert mess == "Done" assert mess == "Done"
...@@ -362,7 +362,7 @@ def test_concat_quiet(caplog): ...@@ -362,7 +362,7 @@ def test_concat_quiet(caplog):
fam_nums = [1, 8, 11] fam_nums = [1, 8, 11]
quiet = True quiet = True
output, mess = pal.concat_alignments(fam_nums, prefix, "nucl", quiet) output, mess = pal.concat_alignments(fam_nums, prefix, "nucl", quiet)
assert output == os.path.join(aldir, dname + "-complete.cat.nucl.aln") assert output == os.path.join(aldir, dname + "-complete.nucl.cat.aln")
ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln") ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(output, ref_concat) assert tutil.compare_order_content(output, ref_concat)
assert mess == "Done" assert mess == "Done"
...@@ -422,7 +422,7 @@ def test_concat_outexists(caplog): ...@@ -422,7 +422,7 @@ def test_concat_outexists(caplog):
shutil.copyfile(orig_btr8, btr8) shutil.copyfile(orig_btr8, btr8)
shutil.copyfile(orig_btr11, btr11) shutil.copyfile(orig_btr11, btr11)
# Create empty concatenated file # Create empty concatenated file
outempty = os.path.join(aldir, dname + "-complete.cat.aa.aln") outempty = os.path.join(aldir, dname + "-complete.aa.cat.aln")
open(outempty, "w").close() open(outempty, "w").close()
# Other parameters, and run concatenation # Other parameters, and run concatenation
fam_nums = [1, 8, 11] fam_nums = [1, 8, 11]
...@@ -435,7 +435,7 @@ def test_concat_outexists(caplog): ...@@ -435,7 +435,7 @@ def test_concat_outexists(caplog):
assert "aa alignments already concatenated" in caplog.text assert "aa alignments already concatenated" in caplog.text
assert ("aa alignments already concatenated in " assert ("aa alignments already concatenated in "
"test/data/align/generated_by_unit-tests/test_concat_aldir/" "test/data/align/generated_by_unit-tests/test_concat_aldir/"
"TESTconcat-complete.cat.aa.aln. " "TESTconcat-complete.aa.cat.aln. "
"Program will use it for next steps") in caplog.text "Program will use it for next steps") in caplog.text
...@@ -492,24 +492,24 @@ def test_postalign(caplog): ...@@ -492,24 +492,24 @@ def test_postalign(caplog):
# print(caplog.text) # print(caplog.text)
# CHECK CONCAT # CHECK CONCAT
# Check that concatenated file in nucl is created and with expected content # Check that concatenated file in nucl is created and with expected content
out_concat_nucl = os.path.join(aldir, dname + "-complete.cat.nucl.aln") out_concat_nucl = os.path.join(aldir, dname + "-complete.nucl.cat.aln")
assert os.path.isfile(out_concat_nucl) assert os.path.isfile(out_concat_nucl)
ref_concat_nucl = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln") ref_concat_nucl = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(out_concat_nucl, ref_concat_nucl) assert tutil.compare_order_content(out_concat_nucl, ref_concat_nucl)
# Check concatenated in aa # Check concatenated in aa
out_concat_aa = os.path.join(aldir, dname + "-complete.cat.aa.aln") out_concat_aa = os.path.join(aldir, dname + "-complete.aa.cat.aln")
assert os.path.isfile(out_concat_aa) assert os.path.isfile(out_concat_aa)
ref_concat_aa = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aa.aln") ref_concat_aa = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aa.aln")
assert tutil.compare_order_content(out_concat_aa, ref_concat_aa) assert tutil.compare_order_content(out_concat_aa, ref_concat_aa)
# CHECK GROUPED # CHECK GROUPED
# Check that grouped by genome file in nucleotides is created, with expected content # Check that grouped by genome file in nucleotides is created, with expected content
treedir = os.path.join(outdir, "Phylo-" + dname) treedir = os.path.join(outdir, "Phylo-" + dname)
out_grp = os.path.join(treedir, dname + ".grp.nucl.aln") out_grp = os.path.join(treedir, dname + ".nucl.grp.aln")
assert os.path.isfile(out_grp) assert os.path.isfile(out_grp)
exp_grp = os.path.join(EXPPATH, "exp_grp_4genomes-fam1-8-11.aln") exp_grp = os.path.join(EXPPATH, "exp_grp_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(out_grp, exp_grp) assert tutil.compare_order_content(out_grp, exp_grp)
# Check aa alignment grouped by genome # Check aa alignment grouped by genome
out_grp_aa = os.path.join(treedir, dname + ".grp.aa.aln") out_grp_aa = os.path.join(treedir, dname + ".aa.grp.aln")
assert os.path.isfile(out_grp_aa) assert os.path.isfile(out_grp_aa)
exp_grp_aa = os.path.join(EXPPATH, "exp_grp_4genomes-fam1-8-11.aa.aln") exp_grp_aa = os.path.join(EXPPATH, "exp_grp_4genomes-fam1-8-11.aa.aln")
assert tutil.compare_order_content(out_grp_aa, exp_grp_aa) assert tutil.compare_order_content(out_grp_aa, exp_grp_aa)
...@@ -588,11 +588,11 @@ def test_postalign_missgenome(caplog): ...@@ -588,11 +588,11 @@ def test_postalign_missgenome(caplog):
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
pal.post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet) pal.post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet)
# Check that concatenated file is created and with expected content # Check that concatenated file is created and with expected content
out_concat = os.path.join(aldir, dname + "-complete.cat.nucl.aln") out_concat = os.path.join(aldir, dname + "-complete.nucl.cat.aln")
assert not os.path.isfile(out_concat) assert not os.path.isfile(out_concat)
# Check that grouped by genome file is not created # Check that grouped by genome file is not created
treedir = os.path.join(outdir, "Phylo-" + dname) treedir = os.path.join(outdir, "Phylo-" + dname)
out_grp = os.path.join(treedir, dname + ".grp.nucl.aln") out_grp = os.path.join(treedir, dname + ".nucl.grp.aln")
assert not os.path.isfile(out_grp) assert not os.path.isfile(out_grp)
# check logs # check logs
assert "Concatenating all nucl alignment files" in caplog.text assert "Concatenating all nucl alignment files" in caplog.text
...@@ -631,7 +631,7 @@ def test_postalign_error_grpaa(caplog): ...@@ -631,7 +631,7 @@ def test_postalign_error_grpaa(caplog):
btr11 = os.path.join(aldir, dname + "-mafft-prt2nuc.11.aln") btr11 = os.path.join(aldir, dname + "-mafft-prt2nuc.11.aln")
ali1 = os.path.join(aldir, dname + "-mafft-align.1.aln") ali1 = os.path.join(aldir, dname + "-mafft-align.1.aln")
ali11 = os.path.join(aldir, dname + "-mafft-align.11.aln") ali11 = os.path.join(aldir, dname + "-mafft-align.11.aln")
concataa = os.path.join(aldir, dname + "-complete.cat.aa.aln") concataa = os.path.join(aldir, dname + "-complete.aa.cat.aln")
shutil.copyfile(orig_btr1, btr1) shutil.copyfile(orig_btr1, btr1)
shutil.copyfile(orig_btr8, btr8) shutil.copyfile(orig_btr8, btr8)
shutil.copyfile(orig_btr11, btr11) shutil.copyfile(orig_btr11, btr11)
...@@ -639,9 +639,9 @@ def test_postalign_error_grpaa(caplog): ...@@ -639,9 +639,9 @@ def test_postalign_error_grpaa(caplog):
shutil.copyfile(orig_ali11, ali11) shutil.copyfile(orig_ali11, ali11)
shutil.copyfile(orig_concat_aa, concataa) shutil.copyfile(orig_concat_aa, concataa)
# Run post-alignment # Run post-alignment
out_concat = os.path.join(aldir, dname + "-complete.cat.nucl.aln") out_concat = os.path.join(aldir, dname + "-complete.nucl.cat.aln")
treedir = os.path.join(outdir, "Phylo-" + dname) treedir = os.path.join(outdir, "Phylo-" + dname)
out_grp = os.path.join(treedir, dname + ".grp.nucl.aln") out_grp = os.path.join(treedir, dname + ".nucl.grp.aln")
assert pal.post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet) == out_grp assert pal.post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet) == out_grp
# Check that concatenated file is created and with expected content # Check that concatenated file is created and with expected content
ref_concat_nucl = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln") ref_concat_nucl = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
...@@ -655,7 +655,7 @@ def test_postalign_error_grpaa(caplog): ...@@ -655,7 +655,7 @@ def test_postalign_error_grpaa(caplog):
assert "Concatenating all nucl alignment files" in caplog.text assert "Concatenating all nucl alignment files" in caplog.text
assert "Grouping nucleic alignments per genome" in caplog.text assert "Grouping nucleic alignments per genome" in caplog.text
assert ("aa alignments already concatenated in test/data/align/generated_by_unit-tests/" assert ("aa alignments already concatenated in test/data/align/generated_by_unit-tests/"
"test_post-align/aldir_post-align/TESTpost-complete.cat.aa.aln. " "test_post-align/aldir_post-align/TESTpost-complete.aa.cat.aln. "
"Program will use it for next steps. If you want to redo it, " "Program will use it for next steps. If you want to redo it, "
"remove it before running.") in caplog.text "remove it before running.") in caplog.text
assert "Grouping protein alignments per genome" in caplog.text assert "Grouping protein alignments per genome" in caplog.text
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment