Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
panacota
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Amandine PERRIN
panacota
Commits
aa0a86b6
Commit
aa0a86b6
authored
4 years ago
by
Amandine PERRIN
Browse files
Options
Downloads
Patches
Plain Diff
.grp.<nucl or aa>.aln -> .<nucl or aa>.grp.aln
parent
c07fc6b4
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
PanACoTA/align_module/post_align.py
+4
-7
4 additions, 7 deletions
PanACoTA/align_module/post_align.py
test/test_unit/test_align/test_postalign.py
+15
-15
15 additions, 15 deletions
test/test_unit/test_align/test_postalign.py
with
19 additions
and
22 deletions
PanACoTA/align_module/post_align.py
+
4
−
7
View file @
aa0a86b6
...
@@ -78,7 +78,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
...
@@ -78,7 +78,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
all_alns_nucl
,
status_nucl
=
concat_alignments
(
fam_nums
,
prefix
,
"
nucl
"
,
quiet
)
all_alns_nucl
,
status_nucl
=
concat_alignments
(
fam_nums
,
prefix
,
"
nucl
"
,
quiet
)
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
os
.
makedirs
(
treedir
,
exist_ok
=
True
)
os
.
makedirs
(
treedir
,
exist_ok
=
True
)
outfile_nucl
=
os
.
path
.
join
(
treedir
,
dname
+
"
.
grp.
nucl.aln
"
)
outfile_nucl
=
os
.
path
.
join
(
treedir
,
dname
+
"
.nucl
.grp
.aln
"
)
res_nucl
=
launch_group_by_genome
(
all_genomes
,
all_alns_nucl
,
status_nucl
,
outfile_nucl
,
dname
,
"
nucleic
"
,
quiet
)
res_nucl
=
launch_group_by_genome
(
all_genomes
,
all_alns_nucl
,
status_nucl
,
outfile_nucl
,
dname
,
"
nucleic
"
,
quiet
)
if
not
res_nucl
:
if
not
res_nucl
:
utils
.
remove
(
all_alns_nucl
)
utils
.
remove
(
all_alns_nucl
)
...
@@ -87,7 +87,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
...
@@ -87,7 +87,7 @@ def post_alignment(fam_nums, all_genomes, prefix, outdir, dname, prot_ali, quiet
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
prot_ali
:
if
prot_ali
:
all_alns_aa
,
status_aa
=
concat_alignments
(
fam_nums
,
prefix
,
"
aa
"
,
quiet
)
all_alns_aa
,
status_aa
=
concat_alignments
(
fam_nums
,
prefix
,
"
aa
"
,
quiet
)
outfile_aa
=
os
.
path
.
join
(
treedir
,
dname
+
"
.grp
.aa
.aln
"
)
outfile_aa
=
os
.
path
.
join
(
treedir
,
dname
+
"
.
aa.
grp.aln
"
)
res_aa
=
launch_group_by_genome
(
all_genomes
,
all_alns_aa
,
status_aa
,
outfile_aa
,
dname
,
"
protein
"
,
quiet
)
res_aa
=
launch_group_by_genome
(
all_genomes
,
all_alns_aa
,
status_aa
,
outfile_aa
,
dname
,
"
protein
"
,
quiet
)
if
not
res_aa
:
if
not
res_aa
:
utils
.
remove
(
all_alns_aa
)
utils
.
remove
(
all_alns_aa
)
...
@@ -104,8 +104,6 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
...
@@ -104,8 +104,6 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
----------
----------
fam_nums : []
fam_nums : []
list of family numbers
list of family numbers
ali_type: str
nucl or aa
prefix : str
prefix : str
path to ``aldir/<name of dataset>-[mafft-align or mafft-prt2nuc]``
path to ``aldir/<name of dataset>-[mafft-align or mafft-prt2nuc]``
(used to get extraction, alignment and btr files easily)
(used to get extraction, alignment and btr files easily)
...
@@ -129,7 +127,7 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
...
@@ -129,7 +127,7 @@ def concat_alignments(fam_nums, prefix, ali_type, quiet):
else
:
else
:
logger
.
error
(
f
"
Not possible to concatenate
'
{
ali_type
}
'
type of alignments.
"
)
logger
.
error
(
f
"
Not possible to concatenate
'
{
ali_type
}
'
type of alignments.
"
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
output
=
f
"
{
prefix
}
-complete.
cat.
{
ali_type
}
.aln
"
output
=
f
"
{
prefix
}
-complete.
{
ali_type
}
.
cat.
aln
"
if
os
.
path
.
isfile
(
output
):
if
os
.
path
.
isfile
(
output
):
logger
.
info
(
f
"
{
ali_type
}
alignments already concatenated
"
)
logger
.
info
(
f
"
{
ali_type
}
alignments already concatenated
"
)
logger
.
warning
(
f
"
{
ali_type
}
alignments already concatenated in
{
output
}
. Program will use
"
logger
.
warning
(
f
"
{
ali_type
}
alignments already concatenated in
{
output
}
. Program will use
"
...
@@ -169,7 +167,7 @@ def launch_group_by_genome(all_genomes, all_alns, status, outfile, dname, type_a
...
@@ -169,7 +167,7 @@ def launch_group_by_genome(all_genomes, all_alns, status, outfile, dname, type_a
dname : str
dname : str
name of dataset
name of dataset
type_ali : str
type_ali : str
nucleic or
aa
nucleic or
protein
quiet : bool
quiet : bool
True if nothing must be sent to sdtout/stderr, False otherwise
True if nothing must be sent to sdtout/stderr, False otherwise
...
@@ -232,7 +230,6 @@ def group_by_genome(args):
...
@@ -232,7 +230,6 @@ def group_by_genome(args):
"""
"""
all_genomes
,
all_alns
,
outfile
=
args
all_genomes
,
all_alns
,
outfile
=
args
sequences
=
read_alignments
(
all_alns
,
all_genomes
)
sequences
=
read_alignments
(
all_alns
,
all_genomes
)
logger
.
info
(
sequences
)
if
not
sequences
:
if
not
sequences
:
return
False
return
False
write_groups
(
outfile
,
sequences
)
write_groups
(
outfile
,
sequences
)
...
...
This diff is collapsed.
Click to expand it.
test/test_unit/test_align/test_postalign.py
+
15
−
15
View file @
aa0a86b6
...
@@ -298,7 +298,7 @@ def test_concat_nucl(caplog):
...
@@ -298,7 +298,7 @@ def test_concat_nucl(caplog):
fam_nums
=
[
1
,
8
,
11
]
fam_nums
=
[
1
,
8
,
11
]
quiet
=
False
quiet
=
False
output
,
mess
=
pal
.
concat_alignments
(
fam_nums
,
prefix
,
"
nucl
"
,
quiet
)
output
,
mess
=
pal
.
concat_alignments
(
fam_nums
,
prefix
,
"
nucl
"
,
quiet
)
assert
output
==
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
cat.
nucl.aln
"
)
assert
output
==
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.nucl
.cat
.aln
"
)
ref_concat
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
ref_concat
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
assert
tutil
.
compare_order_content
(
output
,
ref_concat
)
assert
tutil
.
compare_order_content
(
output
,
ref_concat
)
assert
mess
==
"
Done
"
assert
mess
==
"
Done
"
...
@@ -330,7 +330,7 @@ def test_concat_aa(caplog):
...
@@ -330,7 +330,7 @@ def test_concat_aa(caplog):
fam_nums
=
[
1
,
8
,
11
]
fam_nums
=
[
1
,
8
,
11
]
quiet
=
False
quiet
=
False
output
,
mess
=
pal
.
concat_alignments
(
fam_nums
,
prefix
,
"
aa
"
,
quiet
)
output
,
mess
=
pal
.
concat_alignments
(
fam_nums
,
prefix
,
"
aa
"
,
quiet
)
assert
output
==
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.cat
.aa
.aln
"
)
assert
output
==
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
aa.
cat.aln
"
)
ref_concat
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aa.aln
"
)
ref_concat
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aa.aln
"
)
assert
tutil
.
compare_order_content
(
output
,
ref_concat
)
assert
tutil
.
compare_order_content
(
output
,
ref_concat
)
assert
mess
==
"
Done
"
assert
mess
==
"
Done
"
...
@@ -362,7 +362,7 @@ def test_concat_quiet(caplog):
...
@@ -362,7 +362,7 @@ def test_concat_quiet(caplog):
fam_nums
=
[
1
,
8
,
11
]
fam_nums
=
[
1
,
8
,
11
]
quiet
=
True
quiet
=
True
output
,
mess
=
pal
.
concat_alignments
(
fam_nums
,
prefix
,
"
nucl
"
,
quiet
)
output
,
mess
=
pal
.
concat_alignments
(
fam_nums
,
prefix
,
"
nucl
"
,
quiet
)
assert
output
==
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
cat.
nucl.aln
"
)
assert
output
==
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.nucl
.cat
.aln
"
)
ref_concat
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
ref_concat
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
assert
tutil
.
compare_order_content
(
output
,
ref_concat
)
assert
tutil
.
compare_order_content
(
output
,
ref_concat
)
assert
mess
==
"
Done
"
assert
mess
==
"
Done
"
...
@@ -422,7 +422,7 @@ def test_concat_outexists(caplog):
...
@@ -422,7 +422,7 @@ def test_concat_outexists(caplog):
shutil
.
copyfile
(
orig_btr8
,
btr8
)
shutil
.
copyfile
(
orig_btr8
,
btr8
)
shutil
.
copyfile
(
orig_btr11
,
btr11
)
shutil
.
copyfile
(
orig_btr11
,
btr11
)
# Create empty concatenated file
# Create empty concatenated file
outempty
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.cat
.aa
.aln
"
)
outempty
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
aa.
cat.aln
"
)
open
(
outempty
,
"
w
"
).
close
()
open
(
outempty
,
"
w
"
).
close
()
# Other parameters, and run concatenation
# Other parameters, and run concatenation
fam_nums
=
[
1
,
8
,
11
]
fam_nums
=
[
1
,
8
,
11
]
...
@@ -435,7 +435,7 @@ def test_concat_outexists(caplog):
...
@@ -435,7 +435,7 @@ def test_concat_outexists(caplog):
assert
"
aa alignments already concatenated
"
in
caplog
.
text
assert
"
aa alignments already concatenated
"
in
caplog
.
text
assert
(
"
aa alignments already concatenated in
"
assert
(
"
aa alignments already concatenated in
"
"
test/data/align/generated_by_unit-tests/test_concat_aldir/
"
"
test/data/align/generated_by_unit-tests/test_concat_aldir/
"
"
TESTconcat-complete.cat
.aa
.aln.
"
"
TESTconcat-complete.
aa.
cat.aln.
"
"
Program will use it for next steps
"
)
in
caplog
.
text
"
Program will use it for next steps
"
)
in
caplog
.
text
...
@@ -492,24 +492,24 @@ def test_postalign(caplog):
...
@@ -492,24 +492,24 @@ def test_postalign(caplog):
# print(caplog.text)
# print(caplog.text)
# CHECK CONCAT
# CHECK CONCAT
# Check that concatenated file in nucl is created and with expected content
# Check that concatenated file in nucl is created and with expected content
out_concat_nucl
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
cat.
nucl.aln
"
)
out_concat_nucl
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.nucl
.cat
.aln
"
)
assert
os
.
path
.
isfile
(
out_concat_nucl
)
assert
os
.
path
.
isfile
(
out_concat_nucl
)
ref_concat_nucl
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
ref_concat_nucl
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
assert
tutil
.
compare_order_content
(
out_concat_nucl
,
ref_concat_nucl
)
assert
tutil
.
compare_order_content
(
out_concat_nucl
,
ref_concat_nucl
)
# Check concatenated in aa
# Check concatenated in aa
out_concat_aa
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.cat
.aa
.aln
"
)
out_concat_aa
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
aa.
cat.aln
"
)
assert
os
.
path
.
isfile
(
out_concat_aa
)
assert
os
.
path
.
isfile
(
out_concat_aa
)
ref_concat_aa
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aa.aln
"
)
ref_concat_aa
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aa.aln
"
)
assert
tutil
.
compare_order_content
(
out_concat_aa
,
ref_concat_aa
)
assert
tutil
.
compare_order_content
(
out_concat_aa
,
ref_concat_aa
)
# CHECK GROUPED
# CHECK GROUPED
# Check that grouped by genome file in nucleotides is created, with expected content
# Check that grouped by genome file in nucleotides is created, with expected content
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
out_grp
=
os
.
path
.
join
(
treedir
,
dname
+
"
.
grp.
nucl.aln
"
)
out_grp
=
os
.
path
.
join
(
treedir
,
dname
+
"
.nucl
.grp
.aln
"
)
assert
os
.
path
.
isfile
(
out_grp
)
assert
os
.
path
.
isfile
(
out_grp
)
exp_grp
=
os
.
path
.
join
(
EXPPATH
,
"
exp_grp_4genomes-fam1-8-11.aln
"
)
exp_grp
=
os
.
path
.
join
(
EXPPATH
,
"
exp_grp_4genomes-fam1-8-11.aln
"
)
assert
tutil
.
compare_order_content
(
out_grp
,
exp_grp
)
assert
tutil
.
compare_order_content
(
out_grp
,
exp_grp
)
# Check aa alignment grouped by genome
# Check aa alignment grouped by genome
out_grp_aa
=
os
.
path
.
join
(
treedir
,
dname
+
"
.grp
.aa
.aln
"
)
out_grp_aa
=
os
.
path
.
join
(
treedir
,
dname
+
"
.
aa.
grp.aln
"
)
assert
os
.
path
.
isfile
(
out_grp_aa
)
assert
os
.
path
.
isfile
(
out_grp_aa
)
exp_grp_aa
=
os
.
path
.
join
(
EXPPATH
,
"
exp_grp_4genomes-fam1-8-11.aa.aln
"
)
exp_grp_aa
=
os
.
path
.
join
(
EXPPATH
,
"
exp_grp_4genomes-fam1-8-11.aa.aln
"
)
assert
tutil
.
compare_order_content
(
out_grp_aa
,
exp_grp_aa
)
assert
tutil
.
compare_order_content
(
out_grp_aa
,
exp_grp_aa
)
...
@@ -588,11 +588,11 @@ def test_postalign_missgenome(caplog):
...
@@ -588,11 +588,11 @@ def test_postalign_missgenome(caplog):
with
pytest
.
raises
(
SystemExit
):
with
pytest
.
raises
(
SystemExit
):
pal
.
post_alignment
(
fam_nums
,
all_genomes
,
prefix
,
outdir
,
dname
,
prot_ali
,
quiet
)
pal
.
post_alignment
(
fam_nums
,
all_genomes
,
prefix
,
outdir
,
dname
,
prot_ali
,
quiet
)
# Check that concatenated file is created and with expected content
# Check that concatenated file is created and with expected content
out_concat
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
cat.
nucl.aln
"
)
out_concat
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.nucl
.cat
.aln
"
)
assert
not
os
.
path
.
isfile
(
out_concat
)
assert
not
os
.
path
.
isfile
(
out_concat
)
# Check that grouped by genome file is not created
# Check that grouped by genome file is not created
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
out_grp
=
os
.
path
.
join
(
treedir
,
dname
+
"
.
grp.
nucl.aln
"
)
out_grp
=
os
.
path
.
join
(
treedir
,
dname
+
"
.nucl
.grp
.aln
"
)
assert
not
os
.
path
.
isfile
(
out_grp
)
assert
not
os
.
path
.
isfile
(
out_grp
)
# check logs
# check logs
assert
"
Concatenating all nucl alignment files
"
in
caplog
.
text
assert
"
Concatenating all nucl alignment files
"
in
caplog
.
text
...
@@ -631,7 +631,7 @@ def test_postalign_error_grpaa(caplog):
...
@@ -631,7 +631,7 @@ def test_postalign_error_grpaa(caplog):
btr11
=
os
.
path
.
join
(
aldir
,
dname
+
"
-mafft-prt2nuc.11.aln
"
)
btr11
=
os
.
path
.
join
(
aldir
,
dname
+
"
-mafft-prt2nuc.11.aln
"
)
ali1
=
os
.
path
.
join
(
aldir
,
dname
+
"
-mafft-align.1.aln
"
)
ali1
=
os
.
path
.
join
(
aldir
,
dname
+
"
-mafft-align.1.aln
"
)
ali11
=
os
.
path
.
join
(
aldir
,
dname
+
"
-mafft-align.11.aln
"
)
ali11
=
os
.
path
.
join
(
aldir
,
dname
+
"
-mafft-align.11.aln
"
)
concataa
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.cat
.aa
.aln
"
)
concataa
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
aa.
cat.aln
"
)
shutil
.
copyfile
(
orig_btr1
,
btr1
)
shutil
.
copyfile
(
orig_btr1
,
btr1
)
shutil
.
copyfile
(
orig_btr8
,
btr8
)
shutil
.
copyfile
(
orig_btr8
,
btr8
)
shutil
.
copyfile
(
orig_btr11
,
btr11
)
shutil
.
copyfile
(
orig_btr11
,
btr11
)
...
@@ -639,9 +639,9 @@ def test_postalign_error_grpaa(caplog):
...
@@ -639,9 +639,9 @@ def test_postalign_error_grpaa(caplog):
shutil
.
copyfile
(
orig_ali11
,
ali11
)
shutil
.
copyfile
(
orig_ali11
,
ali11
)
shutil
.
copyfile
(
orig_concat_aa
,
concataa
)
shutil
.
copyfile
(
orig_concat_aa
,
concataa
)
# Run post-alignment
# Run post-alignment
out_concat
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.
cat.
nucl.aln
"
)
out_concat
=
os
.
path
.
join
(
aldir
,
dname
+
"
-complete.nucl
.cat
.aln
"
)
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
treedir
=
os
.
path
.
join
(
outdir
,
"
Phylo-
"
+
dname
)
out_grp
=
os
.
path
.
join
(
treedir
,
dname
+
"
.
grp.
nucl.aln
"
)
out_grp
=
os
.
path
.
join
(
treedir
,
dname
+
"
.nucl
.grp
.aln
"
)
assert
pal
.
post_alignment
(
fam_nums
,
all_genomes
,
prefix
,
outdir
,
dname
,
prot_ali
,
quiet
)
==
out_grp
assert
pal
.
post_alignment
(
fam_nums
,
all_genomes
,
prefix
,
outdir
,
dname
,
prot_ali
,
quiet
)
==
out_grp
# Check that concatenated file is created and with expected content
# Check that concatenated file is created and with expected content
ref_concat_nucl
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
ref_concat_nucl
=
os
.
path
.
join
(
EXPPATH
,
"
exp_concat_4genomes-fam1-8-11.aln
"
)
...
@@ -655,7 +655,7 @@ def test_postalign_error_grpaa(caplog):
...
@@ -655,7 +655,7 @@ def test_postalign_error_grpaa(caplog):
assert
"
Concatenating all nucl alignment files
"
in
caplog
.
text
assert
"
Concatenating all nucl alignment files
"
in
caplog
.
text
assert
"
Grouping nucleic alignments per genome
"
in
caplog
.
text
assert
"
Grouping nucleic alignments per genome
"
in
caplog
.
text
assert
(
"
aa alignments already concatenated in test/data/align/generated_by_unit-tests/
"
assert
(
"
aa alignments already concatenated in test/data/align/generated_by_unit-tests/
"
"
test_post-align/aldir_post-align/TESTpost-complete.cat
.aa
.aln.
"
"
test_post-align/aldir_post-align/TESTpost-complete.
aa.
cat.aln.
"
"
Program will use it for next steps. If you want to redo it,
"
"
Program will use it for next steps. If you want to redo it,
"
"
remove it before running.
"
)
in
caplog
.
text
"
remove it before running.
"
)
in
caplog
.
text
assert
"
Grouping protein alignments per genome
"
in
caplog
.
text
assert
"
Grouping protein alignments per genome
"
in
caplog
.
text
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment