Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
panacota
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Amandine PERRIN
panacota
Commits
f5469400
Commit
f5469400
authored
3 years ago
by
Amandine PERRIN
Browse files
Options
Downloads
Patches
Plain Diff
Add NCBI_strain feature to prepare, + tests for parser
parent
d17c14df
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
PanACoTA/subcommands/prepare.py
+26
-12
26 additions, 12 deletions
PanACoTA/subcommands/prepare.py
test/test_functional/test_prepare-parser.py
+31
-6
31 additions, 6 deletions
test/test_functional/test_prepare-parser.py
with
57 additions
and
18 deletions
PanACoTA/subcommands/prepare.py
+
26
−
12
View file @
f5469400
...
...
@@ -73,7 +73,7 @@ def main_from_parse(arguments):
arguments
.
max_dist
,
arguments
.
verbose
,
arguments
.
quiet
)
def
main
(
cmd
,
ncbi_species_name
,
ncbi_species_taxid
,
ncbi_taxid
,
strains
,
levels
,
ncbi_section
,
def
main
(
cmd
,
ncbi_species_name
,
ncbi_species_taxid
,
ncbi_taxid
,
ncbi_
strains
,
levels
,
ncbi_section
,
outdir
,
tmp_dir
,
threads
,
norefseq
,
db_dir
,
only_mash
,
info_file
,
l90
,
nbcont
,
cutn
,
min_dist
,
max_dist
,
verbose
,
quiet
):
"""
...
...
@@ -95,9 +95,9 @@ def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, strains, levels
ncbi_species_taxid : int
species taxid given in NCBI
ncbi_taxid : int
NCBI taxid
of strain
strains : str
strains to download
NCBI taxid
(sub-species)
ncbi_
strains : str
specific
strains to download
levels: str
Level of assembly to download. Choice between
'
all
'
,
'
complete
'
,
'
chromosome
'
,
'
scaffold
'
,
'
contig
'
. Default is
'
all
'
...
...
@@ -151,7 +151,14 @@ def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, strains, levels
# if species name not species taxid by user, use taxID (if given) to name output directory
elif
ncbi_taxid
:
species_linked
=
str
(
ncbi_taxid
)
# if neither speName, speID nor taxID given (--norefseq, mashonly), name is NA
# If no species nor taxID, get specific strain names
elif
ncbi_strains
:
if
os
.
path
.
isfile
(
ncbi_strains
):
species_linked
=
os
.
path
.
basename
(
ncbi_strains
)
else
:
species_linked
=
"
_
"
.
join
(
ncbi_strains
.
split
())
species_linked
=
"
-
"
.
join
(
ncbi_strains
.
split
(
"
/
"
))
# if neither speName, speID, taxID nor strainName given (--norefseq, mashonly), name is NA
else
:
species_linked
=
"
NA
"
# Default outdir is species name if given, or species taxID
...
...
@@ -244,7 +251,7 @@ def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, strains, levels
else
:
# Download all genomes of the given taxID
db_dir
,
nb_gen
=
dgf
.
download_from_ncbi
(
species_linked
,
ncbi_section
,
ncbi_species_name
,
ncbi_species_taxid
,
ncbi_taxid
,
strains
,
levels
,
outdir
,
threads
)
ncbi_taxid
,
ncbi_
strains
,
levels
,
outdir
,
threads
)
logger
.
info
(
f
"
{
nb_gen
}
{
ncbi_section
}
genome(s) downloaded
"
)
# Now that genomes are downloaded and uncompressed, check their quality to remove bad ones
...
...
@@ -314,8 +321,8 @@ def build_parser(parser):
)
general
.
add_argument
(
"
-S
"
,
dest
=
"
strains
"
,
default
=
""
,
help
=
(
"
List of strains to download.
"
"
A comma-separated list of strain names is possible
.
"
"
A
s well as a path to a filename containing one name per line.
"
"
A comma-separated list of strain names is possible
,
"
"
a
s well as a path to a filename containing one name per line.
"
"
Ex:
'
-S SB2390, IA565
'
for Klebsiella pneumoniae SB2390 and Klebsiella pneumoniae IA565 strains
"
"
Ex:
'
-S path/to/list.txt
'
path to file with strain names, one per line.
"
)
)
...
...
@@ -465,11 +472,13 @@ def check_args(parser, args):
# We don't want to run only mash, nor only quality control, but don't give a NCBI taxID.
# -> Give at least 1!
if
(
not
args
.
only_mash
and
not
args
.
norefseq
and
not
args
.
ncbi_species_taxid
and
not
args
.
ncbi_species_name
and
not
args
.
ncbi_taxid
and
not
args
.
strains
):
not
args
.
ncbi_species_taxid
and
not
args
.
ncbi_species_name
and
not
args
.
ncbi_taxid
and
not
args
.
strains
):
parser
.
error
(
"
As you did not put the
'
--norefseq
'
nor the
'
-M
'
option, it means that
"
"
you want to download refseq (or genbank) genomes. But you did not provide any
"
"
information, so PanACoTA cannot guess which species you want to download.
"
"
Specify NCBI_taxid (-t), and/or NCBI species taxid (-T) and/or NCBI_species (-g) to download, or add one of
"
"
you want to download refseq (or genbank) genomes. But you did not provide
"
"
any information, so PanACoTA cannot guess which species you want to
"
"
download. Specify NCBI_taxid (-t), and/or NCBI species taxid (-T)
"
"
and/or NCBI_species (-g) and/or NCBI_strain (-S) to download, or add one of
"
"
the 2 options (--norefseq or -M) if you want to skip the
'
download step
'
.
"
)
# If norefseq, give output directory
...
...
@@ -513,6 +522,11 @@ def check_args(parser, args):
"
nor an output directory (
'
-o outdir
'
).
"
"
All files will be downloaded in a folder called with the NCBI species
"
f
"
taxid
{
args
.
ncbi_species_taxid
}
instead of the species name.
"
,
"
yellow
"
))
elif
args
.
strains
:
print
(
colored
(
"
WARNING: you did not provide a species name (
'
-g species
'
option)
"
"
nor a species taxid (
'
-T spetaxid
'
) nor an output directory (
'
-o outdir
'
).
"
"
All files will be downloaded in a folder called with the specified strains
"
f
"
names
{
args
.
strains
}
instead of the species name.
"
,
"
yellow
"
))
else
:
print
(
colored
(
"
WARNING: you did not provide a species name (
'
-g species
'
option)
"
"
nor a species taxid (
'
-T spetaxid
'
) nor an output directory (
'
-o outdir
'
).
"
...
...
This diff is collapsed.
Click to expand it.
test/test_functional/test_prepare-parser.py
+
31
−
6
View file @
f5469400
...
...
@@ -23,9 +23,10 @@ def test_parser_noarg(capsys):
print
(
err
)
assert
"
error:
"
in
err
assert
(
"
As you did not put the
'
--norefseq
'
nor the
'
-M
'
option, it means that you want
"
"
to download refseq (or genbank) genomes. But you did not provide any information, so PanACoTA
"
"
cannot guess which species you want to download. Specify NCBI_taxid (-t)
"
)
in
err
assert
(
"
NCBI species taxid (-T) and/or NCBI_species (-g) to download,
"
"
to download refseq (or genbank) genomes. But you did not provide any information,
"
"
so PanACoTA cannot guess which species you want to download.
"
"
Specify NCBI_taxid (-t)
"
)
in
err
assert
(
"
NCBI species taxid (-T) and/or NCBI_species (-g) and/or NCBI_strain (-S) to download,
"
"
or add one of the 2 options (--norefseq or -M)
"
"
if you want to skip the
'
download step
'
.
"
)
in
err
...
...
@@ -274,9 +275,10 @@ def test_parse_missing_arg(capsys):
prepare
.
parse
(
parser
,
"
-p 1
"
.
split
())
_
,
err
=
capsys
.
readouterr
()
assert
(
"
As you did not put the
'
--norefseq
'
nor the
'
-M
'
option, it means that you want
"
"
to download refseq (or genbank) genomes. But you did not provide any information, so PanACoTA
"
"
cannot guess which species you want to download. Specify NCBI_taxid (-t)
"
)
in
err
assert
(
"
NCBI species taxid (-T) and/or NCBI_species (-g) to download,
"
"
to download refseq (or genbank) genomes. But you did not provide any information,
"
"
so PanACoTA cannot guess which species you want to download.
"
"
Specify NCBI_taxid (-t)
"
)
in
err
assert
(
"
NCBI species taxid (-T) and/or NCBI_species (-g) and/or NCBI_strain (-S) to download,
"
"
or add one of the 2 options (--norefseq or -M)
"
"
if you want to skip the
'
download step
'
.
"
)
in
err
...
...
@@ -352,6 +354,29 @@ def test_parser_nospecies(capsys):
"
taxid 1234 instead of the species name.
"
)
in
out
def
test_parser_nospecies_noid
(
capsys
):
"""
Test that when the user does not give an int for the threads value, it returns an
error message.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"
Prepare
"
,
add_help
=
False
)
prepare
.
build_parser
(
parser
)
options
=
prepare
.
parse
(
parser
,
"
-S toto
"
.
split
())
assert
not
options
.
norefseq
assert
not
options
.
only_mash
assert
options
.
ncbi_species_taxid
==
""
assert
options
.
ncbi_taxid
==
""
assert
options
.
ncbi_species_name
==
""
assert
options
.
strains
==
"
toto
"
out
,
err
=
capsys
.
readouterr
()
print
(
out
)
assert
(
"
WARNING: you did not provide a species name (
'
-g species
'
option)
"
"
nor a species taxid (
'
-T spetaxid
'
)
"
"
nor an output directory (
'
-o outdir
'
).
"
)
in
out
assert
(
"
All files will be downloaded in a folder called with the specified strains
"
"
names toto instead of the species name.
"
)
in
out
def
test_parser_nospecies_nospeid
(
capsys
):
"""
Test that when the user does not give an int for the threads value, it returns an
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment