Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
panacota
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Amandine PERRIN
panacota
Commits
f40b737d
Commit
f40b737d
authored
5 years ago
by
Amandine PERRIN
Browse files
Options
Downloads
Patches
Plain Diff
If infofile given, no need for list file!
parent
e4c4f89b
Branches
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#18956
passed
5 years ago
Stage: test
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
PanACoTA/subcommands/annotate.py
+14
-12
14 additions, 12 deletions
PanACoTA/subcommands/annotate.py
with
14 additions
and
12 deletions
PanACoTA/subcommands/annotate.py
+
14
−
12
View file @
f40b737d
...
@@ -176,6 +176,8 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
...
@@ -176,6 +176,8 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
If True, overwrite previous results, if False keep what is already calculated
If True, overwrite previous results, if False keep what is already calculated
qc_only : bool
qc_only : bool
If True, do only quality control, if False, also do annotation
If True, do only quality control, if False, also do annotation
from_info : str
File containing information on genomes and their quality information (from prepare step)
tmp_dir : str or None
tmp_dir : str or None
Path to folder where tmp files must be saved. None to use the default tmp folder
Path to folder where tmp files must be saved. None to use the default tmp folder
res_annot_dir : str or None
res_annot_dir : str or None
...
@@ -257,7 +259,10 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
...
@@ -257,7 +259,10 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
utils
.
check_out_dirs
(
res_dir
)
utils
.
check_out_dirs
(
res_dir
)
# get only filename of list_file, without extension
# get only filename of list_file, without extension
if
list_file
:
listfile_base
=
os
.
path
.
basename
(
os
.
path
.
splitext
(
list_file
)[
0
])
listfile_base
=
os
.
path
.
basename
(
os
.
path
.
splitext
(
list_file
)[
0
])
else
:
listfile_base
=
os
.
path
.
basename
(
os
.
path
.
splitext
(
from_info
)[
0
])
# Initialize logger
# Initialize logger
# set level of logger: level is the minimum level that will be considered.
# set level of logger: level is the minimum level that will be considered.
...
@@ -299,17 +304,8 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
...
@@ -299,17 +304,8 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
# orig_path is the path to the original sequence
# orig_path is the path to the original sequence
# and to_annotate_path the path to the sequence to annotate (once split etc.)
# and to_annotate_path the path to the sequence to annotate (once split etc.)
# Here, both are the same, as we take given sequences as is.
# Here, both are the same, as we take given sequences as is.
genomes
=
utils
.
read_genomes_info
(
from_info
,
name
,
date
,
db_path
,
db_path2
)
genomes
=
utils
.
read_genomes_info
(
from_info
,
name
,
date
,
logger
=
logger
)
if
not
genomes
:
if
db_path2
:
logger
.
error
((
"
We did not find any genome listed in {} in {} folder nor in {}.
"
"
Please check your list to give valid genome
"
"
names.
"
).
format
(
from_info
,
db_path
,
db_path2
))
else
:
logger
.
error
((
"
We did not find any genome listed in {} in the folder {}.
"
"
Please check your list to give valid genome
"
"
names.
"
).
format
(
from_info
,
db_path
))
sys
.
exit
(
-
1
)
# STEP 2. keep only genomes with 'good' (according to user thresholds) L90 and nb_contigs
# STEP 2. keep only genomes with 'good' (according to user thresholds) L90 and nb_contigs
# genomes = {genome: [spegenus.date, orig_seq, path_to_splitSequence, size, nbcont, l90]}
# genomes = {genome: [spegenus.date, orig_seq, path_to_splitSequence, size, nbcont, l90]}
...
@@ -318,6 +314,8 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
...
@@ -318,6 +314,8 @@ def main(cmd, list_file, db_path, db_path2, res_dir, name, date, l90=100, nbcont
# Get list of genomes kept (according to L90 and nbcont thresholds)
# Get list of genomes kept (according to L90 and nbcont thresholds)
kept_genomes
=
{
genome
:
info
for
genome
,
info
in
genomes
.
items
()
kept_genomes
=
{
genome
:
info
for
genome
,
info
in
genomes
.
items
()
if
info
[
-
2
]
<=
nbcont
and
info
[
-
1
]
<=
l90
}
if
info
[
-
2
]
<=
nbcont
and
info
[
-
1
]
<=
l90
}
print
(
genomes
.
keys
())
sys
.
exit
(
1
)
# Write discarded genomes to a file -> orig_name, to_annotate, gsize, nb_conts, L90
# Write discarded genomes to a file -> orig_name, to_annotate, gsize, nb_conts, L90
utils
.
write_genomes_info
(
genomes
,
list
(
kept_genomes
.
keys
()),
list_file
,
res_dir
)
utils
.
write_genomes_info
(
genomes
,
list
(
kept_genomes
.
keys
()),
list_file
,
res_dir
)
# Info on folder containing original sequences
# Info on folder containing original sequences
...
@@ -586,6 +584,10 @@ def check_args(parser, args):
...
@@ -586,6 +584,10 @@ def check_args(parser, args):
parser
.
error
(
"
If you provide a list of genomes with their calculated L90 and number of
"
parser
.
error
(
"
If you provide a list of genomes with their calculated L90 and number of
"
"
contigs, PanACoTA will use the given sequences as is. It will not cut
"
"
contigs, PanACoTA will use the given sequences as is. It will not cut
"
"
them. So, you cannot use both --cutN and --info
"
)
"
them. So, you cannot use both --cutN and --info
"
)
# Give a lst_file or an info file, not nothing
if
not
args
.
from_info
and
not
args
.
list_file
:
parser
.
error
(
"
You must provide a list of genomes to annotate. Either raw genomes
"
"
(see -l option), or genomes with quality information (see --info option).
"
)
# WARNINGS
# WARNINGS
# If user wants to cut genomes, warn him to check that it is on purpose (because default is cut at each 5'N')
# If user wants to cut genomes, warn him to check that it is on purpose (because default is cut at each 5'N')
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment