Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
panacota
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Amandine PERRIN
panacota
Commits
859ba01e
Commit
859ba01e
authored
7 years ago
by
Amandine PERRIN
Browse files
Options
Downloads
Patches
Plain Diff
Complete functional tests for pangenome submodule
parent
7677d952
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
test/test_functional/test_pangenome.py
+330
-6
330 additions, 6 deletions
test/test_functional/test_pangenome.py
with
330 additions
and
6 deletions
test/test_functional/test_pangenome.py
+
330
−
6
View file @
859ba01e
...
...
@@ -3,9 +3,10 @@
"""
Functional tests for genomeAPCAT pangenome
"""
import
glob
import
os
import
pyt
es
t
import
shutil
import
subproc
es
s
import
genomeAPCAT.subcommands.pangenome
as
pan
from
genomeAPCAT
import
utils
...
...
@@ -14,6 +15,7 @@ from genomeAPCAT import utils
LOGFILE_BASE
=
"
func_test_pangenome
"
TEST_FILES
=
os
.
path
.
join
(
"
test
"
,
"
data
"
,
"
pangenome
"
,
"
test_files
"
)
DBPATH
=
os
.
path
.
join
(
TEST_FILES
,
"
example_db
"
,
"
Proteins
"
)
PATH_EXP_FILES
=
os
.
path
.
join
(
"
test
"
,
"
data
"
,
"
pangenome
"
,
"
exp_files
"
)
def
setup_module
():
...
...
@@ -21,20 +23,23 @@ def setup_module():
create logger at start of this test module
"""
utils
.
init_logger
(
LOGFILE_BASE
,
0
,
''
,
verbose
=
1
)
print
(
"
Createc logger
"
)
def
teardown_module
(
module
):
def
teardown_module
():
"""
Remove log files at the end of this test module
"""
os
.
remove
(
LOGFILE_BASE
+
"
.log
"
)
os
.
remove
(
LOGFILE_BASE
+
"
.log.details
"
)
os
.
remove
(
LOGFILE_BASE
+
"
.log.err
"
)
print
(
"
Remove log files
"
)
def
test_main
(
caplog
):
"""
Test
Test that from empty directory, it creates all expected files and
returns correct logs
"""
lstinfo
=
os
.
path
.
join
(
TEST_FILES
,
"
list_to_pan.txt
"
)
name
=
"
testPAN4
"
...
...
@@ -49,5 +54,324 @@ def test_main(caplog):
assert
os
.
path
.
isfile
(
prtbank
)
os
.
remove
(
prtbank
)
assert
os
.
path
.
isdir
(
outdir
)
#check outdir content (presence of mmseq db, mmseq clust, tmp folder,
#families found in pangenome.)
# Check presence of mmseq DB files
msdb
=
os
.
path
.
join
(
outdir
,
"
testPAN4.All.prt-msDB
"
)
assert
os
.
path
.
isfile
(
msdb
)
assert
os
.
path
.
isfile
(
msdb
+
"
.index
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
.lookup
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h.index
"
)
# Check presence of mmseq cluster files
cluster
=
os
.
path
.
join
(
outdir
,
"
testPAN4.All.prt-clust-0.8-mode1_*
"
)
clust_files
=
glob
.
glob
(
cluster
)
assert
len
(
clust_files
)
==
3
clust_base
=
[
cl
for
cl
in
clust_files
if
not
cl
.
endswith
(
"
.index
"
)
and
not
cl
.
endswith
(
"
.tsv
"
)][
0
]
assert
os
.
path
.
isfile
(
clust_base
+
"
.index
"
)
assert
os
.
path
.
isfile
(
clust_base
+
"
.tsv
"
)
assert
os
.
path
.
isfile
(
clust_base
)
# Check presence of tmp folder
tmp_base
=
os
.
path
.
join
(
outdir
,
"
tmp_testPAN4.All.prt_0.8-mode1_*
"
)
assert
len
(
glob
.
glob
(
tmp_base
))
==
1
# Check presence of pangenome files (pangenome, matrices, summary)
pan_base
=
os
.
path
.
join
(
outdir
,
"
PanGenome-{}.tsv.lst
"
.
format
(
os
.
path
.
basename
(
clust_base
)))
assert
os
.
path
.
isfile
(
pan_base
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quali.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quanti.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.summary.txt
"
)
# Check content of pangenome
exp_pan
=
os
.
path
.
join
(
PATH_EXP_FILES
,
"
exp_pangenome-4genomes.lst
"
)
with
open
(
exp_pan
,
"
r
"
)
as
ep
,
open
(
pan_base
,
"
r
"
)
as
panf
:
lines_exp
=
[]
lines_out
=
[]
for
line_exp
in
ep
:
lines_exp
.
append
(
"
"
.
join
(
line_exp
.
split
()[
1
:]))
for
line_out
in
panf
:
lines_out
.
append
(
"
"
.
join
(
line_out
.
split
()[
1
:]))
assert
len
(
lines_exp
)
==
len
(
lines_out
)
assert
set
(
lines_exp
)
==
set
(
lines_out
)
# Check log content
assert
"
Building bank with all proteins to testPAN4.All.prt
"
in
caplog
.
text
assert
"
Creating database
"
in
caplog
.
text
assert
"
Clustering proteins...
"
in
caplog
.
text
assert
"
Converting mmseqs results to pangenome file
"
in
caplog
.
text
assert
"
Pangenome has 16 families
"
in
caplog
.
text
assert
"
Retrieving information from pan families
"
in
caplog
.
text
assert
"
Saving all information to a binary file for later use
"
in
caplog
.
text
assert
"
Generating qualitative and quantitative matrix, and summary file
"
in
caplog
.
text
# Remove output directory
shutil
.
rmtree
(
outdir
)
def
test_main_prt_exist
(
caplog
):
"""
Test that when the prt bank already exists, it writes it to the logs, and
continues using it.
"""
lstinfo
=
os
.
path
.
join
(
TEST_FILES
,
"
list_to_pan.txt
"
)
name
=
"
test2PAN4
"
min_id
=
0.8
outdir
=
"
test_prtExist_main_pangenome_dir
"
clust_mode
=
1
spe_dir
=
None
threads
=
1
# Copy prt bank to database folder, so that it is not created again
src_prt_bank
=
os
.
path
.
join
(
PATH_EXP_FILES
,
"
exp_EXEM.All.prt
"
)
dest_prt_bank
=
os
.
path
.
join
(
DBPATH
,
"
test2PAN4.All.prt
"
)
shutil
.
copyfile
(
src_prt_bank
,
dest_prt_bank
)
pan
.
main
(
lstinfo
,
name
,
DBPATH
,
min_id
,
outdir
,
clust_mode
,
spe_dir
,
threads
)
os
.
remove
(
dest_prt_bank
)
assert
os
.
path
.
isdir
(
outdir
)
# Check presence of mmseq DB files
msdb
=
os
.
path
.
join
(
outdir
,
"
test2PAN4.All.prt-msDB
"
)
assert
os
.
path
.
isfile
(
msdb
)
assert
os
.
path
.
isfile
(
msdb
+
"
.index
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
.lookup
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h.index
"
)
# Check presence of mmseq cluster files
cluster
=
os
.
path
.
join
(
outdir
,
"
test2PAN4.All.prt-clust-0.8-mode1_*
"
)
clust_files
=
glob
.
glob
(
cluster
)
assert
len
(
clust_files
)
==
3
clust_base
=
[
cl
for
cl
in
clust_files
if
not
cl
.
endswith
(
"
.index
"
)
and
not
cl
.
endswith
(
"
.tsv
"
)][
0
]
assert
os
.
path
.
isfile
(
clust_base
+
"
.index
"
)
assert
os
.
path
.
isfile
(
clust_base
+
"
.tsv
"
)
assert
os
.
path
.
isfile
(
clust_base
)
# Check presence of tmp folder
tmp_base
=
os
.
path
.
join
(
outdir
,
"
tmp_test2PAN4.All.prt_0.8-mode1_*
"
)
assert
len
(
glob
.
glob
(
tmp_base
))
==
1
# Check presence of pangenome files (pangenome, matrices, summary)
pan_base
=
os
.
path
.
join
(
outdir
,
"
PanGenome-{}.tsv.lst
"
.
format
(
os
.
path
.
basename
(
clust_base
)))
assert
os
.
path
.
isfile
(
pan_base
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quali.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quanti.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.summary.txt
"
)
# Check content of pangenome
exp_pan
=
os
.
path
.
join
(
PATH_EXP_FILES
,
"
exp_pangenome-4genomes.lst
"
)
with
open
(
exp_pan
,
"
r
"
)
as
ep
,
open
(
pan_base
,
"
r
"
)
as
panf
:
lines_exp
=
[]
lines_out
=
[]
for
line_exp
in
ep
:
lines_exp
.
append
(
"
"
.
join
(
line_exp
.
split
()[
1
:]))
for
line_out
in
panf
:
lines_out
.
append
(
"
"
.
join
(
line_out
.
split
()[
1
:]))
assert
len
(
lines_exp
)
==
len
(
lines_out
)
assert
set
(
lines_exp
)
==
set
(
lines_out
)
# Check log content
assert
(
"
Protein bank test/data/pangenome/test_files/example_db/
"
"
Proteins/test2PAN4.All.prt already exists. It will
"
"
be used by mmseqs.
"
)
in
caplog
.
text
assert
"
Creating database
"
in
caplog
.
text
assert
"
Clustering proteins...
"
in
caplog
.
text
assert
"
Converting mmseqs results to pangenome file
"
in
caplog
.
text
assert
"
Pangenome has 16 families
"
in
caplog
.
text
assert
"
Retrieving information from pan families
"
in
caplog
.
text
assert
"
Saving all information to a binary file for later use
"
in
caplog
.
text
assert
"
Generating qualitative and quantitative matrix, and summary file
"
in
caplog
.
text
# Remove output directory
shutil
.
rmtree
(
outdir
)
def
test_main_spedir
(
caplog
):
"""
Test that from empty directory, it creates all expected files and
returns correct logs
"""
lstinfo
=
os
.
path
.
join
(
TEST_FILES
,
"
list_to_pan.txt
"
)
name
=
"
test3PAN4
"
min_id
=
0.8
outdir
=
"
test_spedir_main_pangenome_dir
"
clust_mode
=
1
spe_dir
=
outdir
+
"
-spe
"
threads
=
1
pan
.
main
(
lstinfo
,
name
,
DBPATH
,
min_id
,
outdir
,
clust_mode
,
spe_dir
,
threads
)
prtbank
=
os
.
path
.
join
(
spe_dir
,
"
test3PAN4.All.prt
"
)
assert
os
.
path
.
isfile
(
prtbank
)
shutil
.
rmtree
(
spe_dir
)
assert
os
.
path
.
isdir
(
outdir
)
# Check presence of mmseq DB files
msdb
=
os
.
path
.
join
(
outdir
,
"
test3PAN4.All.prt-msDB
"
)
assert
os
.
path
.
isfile
(
msdb
)
assert
os
.
path
.
isfile
(
msdb
+
"
.index
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
.lookup
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h.index
"
)
# Check presence of mmseq cluster files
cluster
=
os
.
path
.
join
(
outdir
,
"
test3PAN4.All.prt-clust-0.8-mode1_*
"
)
clust_files
=
glob
.
glob
(
cluster
)
assert
len
(
clust_files
)
==
3
clust_base
=
[
cl
for
cl
in
clust_files
if
not
cl
.
endswith
(
"
.index
"
)
and
not
cl
.
endswith
(
"
.tsv
"
)][
0
]
assert
os
.
path
.
isfile
(
clust_base
+
"
.index
"
)
assert
os
.
path
.
isfile
(
clust_base
+
"
.tsv
"
)
assert
os
.
path
.
isfile
(
clust_base
)
# Check presence of tmp folder
tmp_base
=
os
.
path
.
join
(
outdir
,
"
tmp_test3PAN4.All.prt_0.8-mode1_*
"
)
assert
len
(
glob
.
glob
(
tmp_base
))
==
1
# Check presence of pangenome files (pangenome, matrices, summary)
pan_base
=
os
.
path
.
join
(
outdir
,
"
PanGenome-{}.tsv.lst
"
.
format
(
os
.
path
.
basename
(
clust_base
)))
assert
os
.
path
.
isfile
(
pan_base
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quali.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quanti.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.summary.txt
"
)
# Check content of pangenome
exp_pan
=
os
.
path
.
join
(
PATH_EXP_FILES
,
"
exp_pangenome-4genomes.lst
"
)
with
open
(
exp_pan
,
"
r
"
)
as
ep
,
open
(
pan_base
,
"
r
"
)
as
panf
:
lines_exp
=
[]
lines_out
=
[]
for
line_exp
in
ep
:
lines_exp
.
append
(
"
"
.
join
(
line_exp
.
split
()[
1
:]))
for
line_out
in
panf
:
lines_out
.
append
(
"
"
.
join
(
line_out
.
split
()[
1
:]))
assert
len
(
lines_exp
)
==
len
(
lines_out
)
assert
set
(
lines_exp
)
==
set
(
lines_out
)
# Check log content
assert
"
Building bank with all proteins to test3PAN4.All.prt
"
in
caplog
.
text
assert
"
Creating database
"
in
caplog
.
text
assert
"
Clustering proteins...
"
in
caplog
.
text
assert
"
Converting mmseqs results to pangenome file
"
in
caplog
.
text
assert
"
Pangenome has 16 families
"
in
caplog
.
text
assert
"
Retrieving information from pan families
"
in
caplog
.
text
assert
"
Saving all information to a binary file for later use
"
in
caplog
.
text
assert
"
Generating qualitative and quantitative matrix, and summary file
"
in
caplog
.
text
# Remove output directory
shutil
.
rmtree
(
outdir
)
def
test_main_outfile
(
caplog
):
"""
Test that from empty directory, it creates all expected files and
returns correct logs
"""
lstinfo
=
os
.
path
.
join
(
TEST_FILES
,
"
list_to_pan.txt
"
)
name
=
"
test4PAN4
"
min_id
=
0.8
outdir
=
"
test_outfile_main_pangenome_dir
"
clust_mode
=
1
spe_dir
=
outdir
+
"
-spe
"
threads
=
1
outfile
=
"
mypangenome.txt
"
pan
.
main
(
lstinfo
,
name
,
DBPATH
,
min_id
,
outdir
,
clust_mode
,
spe_dir
,
threads
,
outfile
=
outfile
)
prtbank
=
os
.
path
.
join
(
spe_dir
,
"
test4PAN4.All.prt
"
)
assert
os
.
path
.
isfile
(
prtbank
)
shutil
.
rmtree
(
spe_dir
)
assert
os
.
path
.
isdir
(
outdir
)
# Check presence of mmseq DB files
msdb
=
os
.
path
.
join
(
outdir
,
"
test4PAN4.All.prt-msDB
"
)
assert
os
.
path
.
isfile
(
msdb
)
assert
os
.
path
.
isfile
(
msdb
+
"
.index
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
.lookup
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h.index
"
)
# Check presence of mmseq cluster files
cluster
=
os
.
path
.
join
(
outdir
,
"
test4PAN4.All.prt-clust-0.8-mode1_*
"
)
clust_files
=
glob
.
glob
(
cluster
)
assert
len
(
clust_files
)
==
3
clust_base
=
[
cl
for
cl
in
clust_files
if
not
cl
.
endswith
(
"
.index
"
)
and
not
cl
.
endswith
(
"
.tsv
"
)][
0
]
assert
os
.
path
.
isfile
(
clust_base
+
"
.index
"
)
assert
os
.
path
.
isfile
(
clust_base
+
"
.tsv
"
)
assert
os
.
path
.
isfile
(
clust_base
)
# Check presence of tmp folder
tmp_base
=
os
.
path
.
join
(
outdir
,
"
tmp_test4PAN4.All.prt_0.8-mode1_*
"
)
assert
len
(
glob
.
glob
(
tmp_base
))
==
1
# Check presence of pangenome files (pangenome, matrices, summary)
outf
=
os
.
path
.
join
(
outdir
,
outfile
)
assert
os
.
path
.
isfile
(
outf
)
assert
os
.
path
.
isfile
(
outf
+
"
.quali.txt
"
)
assert
os
.
path
.
isfile
(
outf
+
"
.quanti.txt
"
)
assert
os
.
path
.
isfile
(
outf
+
"
.summary.txt
"
)
# Check content of pangenome
exp_pan
=
os
.
path
.
join
(
PATH_EXP_FILES
,
"
exp_pangenome-4genomes.lst
"
)
with
open
(
exp_pan
,
"
r
"
)
as
ep
,
open
(
outf
,
"
r
"
)
as
panf
:
lines_exp
=
[]
lines_out
=
[]
for
line_exp
in
ep
:
lines_exp
.
append
(
"
"
.
join
(
line_exp
.
split
()[
1
:]))
for
line_out
in
panf
:
lines_out
.
append
(
"
"
.
join
(
line_out
.
split
()[
1
:]))
assert
len
(
lines_exp
)
==
len
(
lines_out
)
assert
set
(
lines_exp
)
==
set
(
lines_out
)
# Check log content
assert
"
Building bank with all proteins to test4PAN4.All.prt
"
in
caplog
.
text
assert
"
Creating database
"
in
caplog
.
text
assert
"
Clustering proteins...
"
in
caplog
.
text
assert
"
Converting mmseqs results to pangenome file
"
in
caplog
.
text
assert
"
Pangenome has 16 families
"
in
caplog
.
text
assert
"
Retrieving information from pan families
"
in
caplog
.
text
assert
"
Saving all information to a binary file for later use
"
in
caplog
.
text
assert
"
Generating qualitative and quantitative matrix, and summary file
"
in
caplog
.
text
# Remove output directory
shutil
.
rmtree
(
outdir
)
def
test_pangenome_all
():
"""
Test when calling pangenome from command line, it runs and gives expected output files
"""
lstinfo
=
os
.
path
.
join
(
TEST_FILES
,
"
list_to_pan.txt
"
)
name
=
"
testAllPAN4
"
min_id
=
0.8
outdir
=
"
test_all_main_pangenome_dir
"
cmd
=
"
genomeAPCAT pangenome -l {} -n {} -d {} -i {} -o {}
"
.
format
(
lstinfo
,
name
,
DBPATH
,
min_id
,
outdir
)
ret
=
subprocess
.
call
(
cmd
.
split
())
assert
ret
==
0
prtbank
=
os
.
path
.
join
(
DBPATH
,
"
testAllPAN4.All.prt
"
)
assert
os
.
path
.
isfile
(
prtbank
)
os
.
remove
(
prtbank
)
assert
os
.
path
.
isdir
(
outdir
)
# Check presence of mmseq DB files
msdb
=
os
.
path
.
join
(
outdir
,
"
testAllPAN4.All.prt-msDB
"
)
assert
os
.
path
.
isfile
(
msdb
)
assert
os
.
path
.
isfile
(
msdb
+
"
.index
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
.lookup
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h
"
)
assert
os
.
path
.
isfile
(
msdb
+
"
_h.index
"
)
# Check presence of mmseq cluster files
cluster
=
os
.
path
.
join
(
outdir
,
"
testAllPAN4.All.prt-clust-0.8-mode1_*
"
)
clust_files
=
glob
.
glob
(
cluster
)
assert
len
(
clust_files
)
==
3
clust_base
=
[
cl
for
cl
in
clust_files
if
not
cl
.
endswith
(
"
.index
"
)
and
not
cl
.
endswith
(
"
.tsv
"
)][
0
]
assert
os
.
path
.
isfile
(
clust_base
+
"
.index
"
)
assert
os
.
path
.
isfile
(
clust_base
+
"
.tsv
"
)
assert
os
.
path
.
isfile
(
clust_base
)
# Check presence of tmp folder
tmp_base
=
os
.
path
.
join
(
outdir
,
"
tmp_testAllPAN4.All.prt_0.8-mode1_*
"
)
assert
len
(
glob
.
glob
(
tmp_base
))
==
1
# Check presence of pangenome files (pangenome, matrices, summary)
pan_base
=
os
.
path
.
join
(
outdir
,
"
PanGenome-{}.tsv.lst
"
.
format
(
os
.
path
.
basename
(
clust_base
)))
assert
os
.
path
.
isfile
(
pan_base
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quali.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.quanti.txt
"
)
assert
os
.
path
.
isfile
(
pan_base
+
"
.summary.txt
"
)
# Check content of pangenome
exp_pan
=
os
.
path
.
join
(
PATH_EXP_FILES
,
"
exp_pangenome-4genomes.lst
"
)
with
open
(
exp_pan
,
"
r
"
)
as
ep
,
open
(
pan_base
,
"
r
"
)
as
panf
:
lines_exp
=
[]
lines_out
=
[]
for
line_exp
in
ep
:
lines_exp
.
append
(
"
"
.
join
(
line_exp
.
split
()[
1
:]))
for
line_out
in
panf
:
lines_out
.
append
(
"
"
.
join
(
line_out
.
split
()[
1
:]))
assert
len
(
lines_exp
)
==
len
(
lines_out
)
assert
set
(
lines_exp
)
==
set
(
lines_out
)
# Check presence of log files, and that .err is empty
log_base
=
os
.
path
.
join
(
outdir
,
"
genomeAPCAT-pangenome_testAllPAN4.log
"
)
assert
os
.
path
.
isfile
(
log_base
)
assert
os
.
path
.
isfile
(
log_base
+
"
.details
"
)
assert
os
.
path
.
isfile
(
log_base
+
"
.err
"
)
with
open
(
log_base
+
"
.err
"
)
as
errf
:
lines
=
errf
.
readlines
()
assert
lines
==
[]
# Remove output directory
shutil
.
rmtree
(
outdir
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment