Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
panacota
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Amandine PERRIN
panacota
Commits
c00044d0
Commit
c00044d0
authored
5 years ago
by
Amandine PERRIN
Browse files
Options
Downloads
Patches
Plain Diff
Add path to sequence annotated
parent
5b411ee1
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
PanACoTA/subcommands/annotate.py
+33
-9
33 additions, 9 deletions
PanACoTA/subcommands/annotate.py
with
33 additions
and
9 deletions
PanACoTA/subcommands/annotate.py
+
33
−
9
View file @
c00044d0
...
...
@@ -101,6 +101,7 @@ April 2017
import
os
import
sys
from
termcolor
import
colored
import
sys
def
main_from_parse
(
arguments
):
...
...
@@ -279,16 +280,19 @@ def main(cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn
"
names.
"
).
format
(
list_file
,
db_path
))
sys
.
exit
(
-
1
)
# Get L90, nbcontig, size for all genomes, and cut at stretches of 'N' if asked
# -> genome: [spegenus.date, path, size, nbcont, l90]
# -> genome: [spegenus.date,
to_annotate_
path, size, nbcont, l90]
gfunc
.
analyse_all_genomes
(
genomes
,
db_path
,
tmp_dir
,
cutn
,
prodigal_only
,
logger
,
quiet
=
quiet
)
# --info <filename> option given: read information (L90, nb contigs...) from this file.
else
:
# genomes = {genome: [spegenus.date, path_to_splitSequence, size, nbcont, l90]}
# genomes = genome: [spegenus.date, orig_path, to_annotate_path, size, nbcont, l90]
# orig_path is the path to the original sequence
# and to_annotate_path the path to the sequence to annotate (once split etc.)
# Here, both are the same, as we take given sequences as is.
genomes
=
utils
.
read_genomes_info
(
from_info
,
name
,
date
,
db_path
,
tmp_dir
)
# STEP 2. keep only genomes with 'good' (according to user thresholds) L90 and nb_contigs
# genomes = {genome: [spegenus.date, path_to_splitSequence, size, nbcont, l90]}
# genomes = {genome: [spegenus.date,
orig_seq,
path_to_splitSequence, size, nbcont, l90]}
# Plot L90 and nb_contigs distributions
gfunc
.
plot_distributions
(
genomes
,
res_dir
,
listfile_base
,
l90
,
nbcont
)
# Get list of genomes kept (according to L90 and nbcont thresholds)
...
...
@@ -300,16 +304,18 @@ def main(cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn
if
qc_only
:
utils
.
write_discarded
(
genomes
,
[],
list_file
,
res_dir
,
qc
=
True
)
logger
.
info
(
"
QC only done.
"
)
return
genomes
,
kept_genomes
return
# STEP 3. Rename genomes kept, ordered by decreasing quality
gfunc
.
rename_all_genomes
(
kept_genomes
)
# kept_genomes = {genome: [gembase_name, path_split_gembase, gsize, nbcont, L90]}
# kept_genomes = {genome: [gembase_name, path_to_origfile, path_split_gembase,
# gsize, nbcont, L90]}
# Write lstinfo file (list of genomes kept with info on L90 etc.)
logger
.
info
(
"
-> Original sequences folder: {}
"
.
format
(
db_path
))
logger
.
info
(
"
-> Folder with sequences to annotate: {}
"
.
format
(
tmp_dir
))
utils
.
write_lstinfo
(
list_file
,
kept_genomes
,
res_dir
)
# STEP 4. Annotate all kept genomes
results
=
pfunc
.
run_annotation_all
(
kept_genomes
,
threads
,
force
,
res_annot_dir
,
prodigal_only
,
small
,
quiet
=
quiet
)
import
sys
sys
.
exit
(
1
)
# List of genomes to format
results_ok
=
[
genome
for
(
genome
,
ok
)
in
results
.
items
()
if
ok
]
...
...
@@ -324,12 +330,13 @@ def main(cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn
if
skipped
:
utils
.
write_warning_skipped
(
skipped
,
prodigal_only
=
prodigal_only
,
logfile
=
logfile_base
)
# Initialize list of genomes skipped because something went wrong while formatting.
skipped_format
=
[]
# STEP 5. Format genomes annotated
# Here, we have at least 1 genome annotated (otherwise,
# it would already have stopped because results_ok is empty)
# Initialize list of genomes skipped because something went wrong while formatting.
skipped_format
=
[]
# Generate database (folders Proteins, Genes, Replicons, LSTINFO)
skipped_format
=
ffunc
.
format_genomes
(
genomes
,
results_ok
,
res_dir
,
res_annot_dir
,
prodigal_only
,
threads
,
quiet
=
quiet
)
...
...
@@ -350,8 +357,23 @@ def build_parser(parser):
The parser to configure
"""
import
argparse
from
PanACoTA
import
utils
from
textwrap
import
dedent
header
=
'''
___ _____ ___ _____ _____
( _`\ ( _ )( _`\ (_ _)( _ )
| |_) ) _ _ ___ | (_) || ( (_) _ | | | (_) |
| ,__/
'
/
'
_` )/
'
_ `\| _ || | _ /
'
_`\ | | | _ |
| | ( (_| || ( ) || | | || (_( )( (_) )| | | | | |
(_) `\__,_)(_) (_)(_) (_)(____/
'
`\___/
'
(_) (_) (_)
Large scale comparative genomics tools
-------------------------------------------
'''
print
(
dedent
(
header
))
def
gen_name
(
param
):
if
not
utils
.
check_format
(
param
):
...
...
@@ -498,6 +520,8 @@ def parse(parser, argu):
argparse.Namespace
Parsed arguments
"""
import
argparse
args
=
parser
.
parse_args
(
argu
)
return
check_args
(
parser
,
args
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment