Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
panacota
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Amandine PERRIN
panacota
Commits
ca7bb4a4
Commit
ca7bb4a4
authored
6 years ago
by
Amandine PERRIN
Browse files
Options
Downloads
Patches
Plain Diff
Generate .prt file from prodigal results
parent
ebe1fd80
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
PanACoTA/annotate_module/format_prodigal.py
+18
-9
18 additions, 9 deletions
PanACoTA/annotate_module/format_prodigal.py
PanACoTA/subcommands/annotate.py
+4
-2
4 additions, 2 deletions
PanACoTA/subcommands/annotate.py
PanACoTA/utils.py
+4
-3
4 additions, 3 deletions
PanACoTA/utils.py
with
26 additions
and
14 deletions
PanACoTA/annotate_module/format_prodigal.py
+
18
−
9
View file @
ca7bb4a4
...
...
@@ -118,6 +118,7 @@ def create_gene_lst(gen_file, res_gene_file, res_lst_file, name, logger):
prev_start
=
""
prev_end
=
""
prev_strand
=
""
prev_annot
=
""
# Update loc when contig changes
prev_loc
=
"
b
"
loc
=
"
b
"
...
...
@@ -125,6 +126,7 @@ def create_gene_lst(gen_file, res_gene_file, res_lst_file, name, logger):
# Open files: ffn prodigal to read, .gen and .lst gembase to create
with
open
(
gen_file
,
"
r
"
)
as
ffn
,
open
(
res_gene_file
,
"
w
"
)
as
r_gen
,
\
open
(
res_lst_file
,
"
w
"
)
as
r_lst
:
# Read all lines in ffn file (sequences in nuc. for each gene)
for
lineffn
in
ffn
:
# If it is a sequence, save it and go to next line
if
not
lineffn
.
startswith
(
"
>
"
):
...
...
@@ -156,6 +158,7 @@ def create_gene_lst(gen_file, res_gene_file, res_lst_file, name, logger):
prev_start
=
start
prev_end
=
end
prev_cont_num
=
contig_num
prev_annot
=
annot
if
int
(
strand
)
==
1
:
strand
=
"
D
"
else
:
...
...
@@ -165,7 +168,8 @@ def create_gene_lst(gen_file, res_gene_file, res_lst_file, name, logger):
# Write line in lstinfo, + header and sequence to the gene file
_
,
lstline
=
gfunc
.
write_gene
(
"
CDS
"
,
locus_num
,
"
NA
"
,
"
NA
"
,
0
,
prev_loc
,
name
,
prev_cont_num
,
"
NA
"
,
"
NA
"
,
prev_strand
,
prev_start
,
prev_end
,
r_lst
)
prev_loc
,
name
,
prev_cont_num
,
"
NA
"
,
prev_annot
,
prev_strand
,
prev_start
,
prev_end
,
r_lst
)
gfunc
.
write_header
(
lstline
,
r_gen
)
r_gen
.
write
(
seq
)
...
...
@@ -183,10 +187,12 @@ def create_gene_lst(gen_file, res_gene_file, res_lst_file, name, logger):
prev_end
=
end
prev_strand
=
strand
prev_loc
=
loc
prev_annot
=
annot
# Write last sequence (-> loc = 'b')
prev_loc
=
"
b
"
_
,
lstline
=
gfunc
.
write_gene
(
"
CDS
"
,
locus_num
,
"
NA
"
,
"
NA
"
,
0
,
prev_loc
,
name
,
prev_cont_num
,
"
NA
"
,
"
NA
"
,
prev_strand
,
prev_start
,
prev_end
,
r_lst
)
prev_loc
,
name
,
prev_cont_num
,
"
NA
"
,
prev_annot
,
prev_strand
,
prev_start
,
prev_end
,
r_lst
)
gfunc
.
write_header
(
lstline
,
r_gen
)
r_gen
.
write
(
seq
)
return
True
...
...
@@ -213,10 +219,11 @@ def create_prt(prot_file, res_prot_file, res_lst_file, logger):
True if conversion went well, False otherwise
"""
# Open
# + prot file to read gene sequences from prodigal results
# + res_prot file to write sequences with with gembase headers
# + res_lst_file to get genes gembase names
# Open:
# - prot file to read gene sequences from prodigal results
# - res_prot file to write sequences with gembase headers
# - res_lst_file to get genes gembase names and other infos (strand, size...)
with
open
(
prot_file
,
"
r
"
)
as
faa
,
open
(
res_prot_file
,
"
w
"
)
as
r_prt
,
\
open
(
res_lst_file
,
"
r
"
)
as
r_lst
:
for
lineprot
in
faa
:
...
...
@@ -227,10 +234,12 @@ def create_prt(prot_file, res_prot_file, res_lst_file, logger):
# If header, replace by gembase header
linelst
=
r_lst
.
readline
()
# gembase name is in the fifth column of lst file
start
,
end
,
_
,
_
,
gem_name
,
product
,
info
=
linelst
.
split
(
"
\t
"
)[
4
]
start
,
end
,
_
,
_
,
gem_name
,
product
,
info
=
linelst
.
strip
().
split
(
"
\t
"
)
# Write this gembase name as a new header
prt_header
=
"
\t
"
.
join
([
gem_name
,
str
(
int
(
end
)
-
int
(
start
)),
product
,
info
])
r_prt
.
write
(
"
>
"
+
prt_header
+
"
\n
"
)
size
=
int
(
end
)
-
int
(
start
)
+
1
towrite
=
"
\t
"
.
join
([
gem_name
,
str
(
size
),
product
,
info
])
r_prt
.
write
(
"
>
"
+
towrite
+
"
\n
"
)
This diff is collapsed.
Click to expand it.
PanACoTA/subcommands/annotate.py
+
4
−
2
View file @
ca7bb4a4
...
...
@@ -303,9 +303,11 @@ def main(cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn
skipped_format
=
ffunc
.
format_genomes
(
genomes
,
results_ok
,
res_dir
,
res_annot_dir
,
prodigal_only
,
threads
,
quiet
=
quiet
)
if
skipped
:
utils
.
write_warning_skipped
(
skipped
,
prodigal_only
=
prodigal_only
)
utils
.
write_warning_skipped
(
skipped
,
prodigal_only
=
prodigal_only
,
logfile
=
logfile_base
+
"
.log
"
)
if
skipped_format
:
utils
.
write_warning_skipped
(
skipped_format
,
do_format
=
True
,
prodigal_only
=
prodigal_only
)
utils
.
write_warning_skipped
(
skipped_format
,
do_format
=
True
,
prodigal_only
=
prodigal_only
,
logfile
=
logfile_base
+
"
.log
"
)
logger
.
info
(
"
Annotation step done.
"
)
return
genomes
,
kept_genomes
,
skipped
,
skipped_format
...
...
This diff is collapsed.
Click to expand it.
PanACoTA/utils.py
+
4
−
3
View file @
ca7bb4a4
...
...
@@ -345,7 +345,7 @@ def plot_distr(values, limit, title, text):
return
fig
def
write_warning_skipped
(
skipped
,
do_format
=
False
,
prodigal_only
=
False
):
def
write_warning_skipped
(
skipped
,
do_format
=
False
,
prodigal_only
=
False
,
logfile
=
""
):
"""
At the end of the script, write a warning to the user with the names of the genomes
which had problems with prokka.
...
...
@@ -379,9 +379,10 @@ def write_warning_skipped(skipped, do_format=False, prodigal_only=False):
"
Here are the genomes (problem with {1} or no
"
"
gene found):
\n
{0}
"
).
format
(
soft
,
list_to_write
))
else
:
logger
.
info
(
"
WARNING: Some genomes could not be formatted. See {}
"
.
format
(
logfile
))
logger
.
warning
((
"
Some genomes were annotated by {}, but could not be formatted,
"
"
and are hence absent from your output database. Please look at
log
"
"
files to get more information about why they could not be
"
"
and are hence absent from your output database. Please look at
"
"
log.details
files to get more information about why they could not be
"
"
formatted.
\n
{}
"
).
format
(
soft
,
list_to_write
))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment