Skip to content
Snippets Groups Projects
Commit aa0a86b6 authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

.grp.<nucl or aa>.aln -> .<nucl or aa>.grp.aln

parent c07fc6b4
No related branches found
No related tags found
No related merge requests found
......@@ -78,7 +78,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
all_alns_nucl, status_nucl = concat_alignments(fam_nums, prefix, "nucl", quiet)
treedir = os.path.join(outdir, "Phylo-" + dname)
os.makedirs(treedir, exist_ok=True)
outfile_nucl = os.path.join(treedir, dname + ".grp.nucl.aln")
outfile_nucl = os.path.join(treedir, dname + ".nucl.grp.aln")
res_nucl = launch_group_by_genome(all_genomes, all_alns_nucl, status_nucl, outfile_nucl, dname, "nucleic", quiet)
if not res_nucl:
utils.remove(all_alns_nucl)
......@@ -87,7 +87,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
sys.exit(1)
if prot_ali:
all_alns_aa, status_aa = concat_alignments(fam_nums, prefix, "aa", quiet)
outfile_aa = os.path.join(treedir, dname + ".grp.aa.aln")
outfile_aa = os.path.join(treedir, dname + ".aa.grp.aln")
res_aa = launch_group_by_genome(all_genomes, all_alns_aa, status_aa, outfile_aa, dname, "protein", quiet)
if not res_aa:
utils.remove(all_alns_aa)
......@@ -104,8 +104,6 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
----------
fam_nums : []
list of family numbers
ali_type: str
nucl or aa
prefix : str
path to ``aldir/<name of dataset>-[mafft-align or mafft-prt2nuc]``
(used to get extraction, alignment and btr files easily)
......@@ -129,7 +127,7 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
else:
logger.error(f"Not possible to concatenate '{ali_type}' type of alignments.")
sys.exit(1)
output = f"{prefix}-complete.cat.{ali_type}.aln"
output = f"{prefix}-complete.{ali_type}.cat.aln"
if os.path.isfile(output):
logger.info(f"{ali_type} alignments already concatenated")
logger.warning(f"{ali_type} alignments already concatenated in {output}. Program will use "
......@@ -169,7 +167,7 @@ def launch_group_by_genome(all_genomes, all_alns, status, outfile, dname, type_a
dname : str
name of dataset
type_ali : str
nucleic or aa
nucleic or protein
quiet : bool
True if nothing must be sent to sdtout/stderr, False otherwise
......@@ -232,7 +230,6 @@ def group_by_genome(args):
"""
all_genomes, all_alns, outfile = args
sequences = read_alignments(all_alns, all_genomes)
logger.info(sequences)
if not sequences:
return False
write_groups(outfile, sequences)
......
......@@ -298,7 +298,7 @@ def test_concat_nucl(caplog):
fam_nums = [1, 8, 11]
quiet = False
output, mess = pal.concat_alignments(fam_nums, prefix, "nucl", quiet)
assert output == os.path.join(aldir, dname + "-complete.cat.nucl.aln")
assert output == os.path.join(aldir, dname + "-complete.nucl.cat.aln")
ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(output, ref_concat)
assert mess == "Done"
......@@ -330,7 +330,7 @@ def test_concat_aa(caplog):
fam_nums = [1, 8, 11]
quiet = False
output, mess = pal.concat_alignments(fam_nums, prefix, "aa", quiet)
assert output == os.path.join(aldir, dname + "-complete.cat.aa.aln")
assert output == os.path.join(aldir, dname + "-complete.aa.cat.aln")
ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aa.aln")
assert tutil.compare_order_content(output, ref_concat)
assert mess == "Done"
......@@ -362,7 +362,7 @@ def test_concat_quiet(caplog):
fam_nums = [1, 8, 11]
quiet = True
output, mess = pal.concat_alignments(fam_nums, prefix, "nucl", quiet)
assert output == os.path.join(aldir, dname + "-complete.cat.nucl.aln")
assert output == os.path.join(aldir, dname + "-complete.nucl.cat.aln")
ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(output, ref_concat)
assert mess == "Done"
......@@ -422,7 +422,7 @@ def test_concat_outexists(caplog):
shutil.copyfile(orig_btr8, btr8)
shutil.copyfile(orig_btr11, btr11)
# Create empty concatenated file
outempty = os.path.join(aldir, dname + "-complete.cat.aa.aln")
outempty = os.path.join(aldir, dname + "-complete.aa.cat.aln")
open(outempty, "w").close()
# Other parameters, and run concatenation
fam_nums = [1, 8, 11]
......@@ -435,7 +435,7 @@ def test_concat_outexists(caplog):
assert "aa alignments already concatenated" in caplog.text
assert ("aa alignments already concatenated in "
"test/data/align/generated_by_unit-tests/test_concat_aldir/"
"TESTconcat-complete.cat.aa.aln. "
"TESTconcat-complete.aa.cat.aln. "
"Program will use it for next steps") in caplog.text
......@@ -492,24 +492,24 @@ def test_postalign(caplog):
# print(caplog.text)
# CHECK CONCAT
# Check that concatenated file in nucl is created and with expected content
out_concat_nucl = os.path.join(aldir, dname + "-complete.cat.nucl.aln")
out_concat_nucl = os.path.join(aldir, dname + "-complete.nucl.cat.aln")
assert os.path.isfile(out_concat_nucl)
ref_concat_nucl = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(out_concat_nucl, ref_concat_nucl)
# Check concatenated in aa
out_concat_aa = os.path.join(aldir, dname + "-complete.cat.aa.aln")
out_concat_aa = os.path.join(aldir, dname + "-complete.aa.cat.aln")
assert os.path.isfile(out_concat_aa)
ref_concat_aa = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aa.aln")
assert tutil.compare_order_content(out_concat_aa, ref_concat_aa)
# CHECK GROUPED
# Check that grouped by genome file in nucleotides is created, with expected content
treedir = os.path.join(outdir, "Phylo-" + dname)
out_grp = os.path.join(treedir, dname + ".grp.nucl.aln")
out_grp = os.path.join(treedir, dname + ".nucl.grp.aln")
assert os.path.isfile(out_grp)
exp_grp = os.path.join(EXPPATH, "exp_grp_4genomes-fam1-8-11.aln")
assert tutil.compare_order_content(out_grp, exp_grp)
# Check aa alignment grouped by genome
out_grp_aa = os.path.join(treedir, dname + ".grp.aa.aln")
out_grp_aa = os.path.join(treedir, dname + ".aa.grp.aln")
assert os.path.isfile(out_grp_aa)
exp_grp_aa = os.path.join(EXPPATH, "exp_grp_4genomes-fam1-8-11.aa.aln")
assert tutil.compare_order_content(out_grp_aa, exp_grp_aa)
......@@ -588,11 +588,11 @@ def test_postalign_missgenome(caplog):
with pytest.raises(SystemExit):
pal.post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet)
# Check that concatenated file is created and with expected content
out_concat = os.path.join(aldir, dname + "-complete.cat.nucl.aln")
out_concat = os.path.join(aldir, dname + "-complete.nucl.cat.aln")
assert not os.path.isfile(out_concat)
# Check that grouped by genome file is not created
treedir = os.path.join(outdir, "Phylo-" + dname)
out_grp = os.path.join(treedir, dname + ".grp.nucl.aln")
out_grp = os.path.join(treedir, dname + ".nucl.grp.aln")
assert not os.path.isfile(out_grp)
# check logs
assert "Concatenating all nucl alignment files" in caplog.text
......@@ -631,7 +631,7 @@ def test_postalign_error_grpaa(caplog):
btr11 = os.path.join(aldir, dname + "-mafft-prt2nuc.11.aln")
ali1 = os.path.join(aldir, dname + "-mafft-align.1.aln")
ali11 = os.path.join(aldir, dname + "-mafft-align.11.aln")
concataa = os.path.join(aldir, dname + "-complete.cat.aa.aln")
concataa = os.path.join(aldir, dname + "-complete.aa.cat.aln")
shutil.copyfile(orig_btr1, btr1)
shutil.copyfile(orig_btr8, btr8)
shutil.copyfile(orig_btr11, btr11)
......@@ -639,9 +639,9 @@ def test_postalign_error_grpaa(caplog):
shutil.copyfile(orig_ali11, ali11)
shutil.copyfile(orig_concat_aa, concataa)
# Run post-alignment
out_concat = os.path.join(aldir, dname + "-complete.cat.nucl.aln")
out_concat = os.path.join(aldir, dname + "-complete.nucl.cat.aln")
treedir = os.path.join(outdir, "Phylo-" + dname)
out_grp = os.path.join(treedir, dname + ".grp.nucl.aln")
out_grp = os.path.join(treedir, dname + ".nucl.grp.aln")
assert pal.post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet) == out_grp
# Check that concatenated file is created and with expected content
ref_concat_nucl = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
......@@ -655,7 +655,7 @@ def test_postalign_error_grpaa(caplog):
assert "Concatenating all nucl alignment files" in caplog.text
assert "Grouping nucleic alignments per genome" in caplog.text
assert ("aa alignments already concatenated in test/data/align/generated_by_unit-tests/"
"test_post-align/aldir_post-align/TESTpost-complete.cat.aa.aln. "
"test_post-align/aldir_post-align/TESTpost-complete.aa.cat.aln. "
"Program will use it for next steps. If you want to redo it, "
"remove it before running.") in caplog.text
assert "Grouping protein alignments per genome" in caplog.text
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment