Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
panacota
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Amandine PERRIN
panacota
Commits
66d300bc
Commit
66d300bc
authored
6 years ago
by
Amandine PERRIN
Browse files
Options
Downloads
Patches
Plain Diff
Sequences are now correctly formatted (for prokka cut and no cut, and for prodigal cut)
parent
1d87b4eb
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
PanACoTA/annotate_module/genome_seq_functions.py
+13
-14
13 additions, 14 deletions
PanACoTA/annotate_module/genome_seq_functions.py
with
13 additions
and
14 deletions
PanACoTA/annotate_module/genome_seq_functions.py
+
13
−
14
View file @
66d300bc
...
@@ -86,8 +86,6 @@ def analyse_all_genomes(genomes, dbpath, tmp_path, nbn, prodigal_only, logger, q
...
@@ -86,8 +86,6 @@ def analyse_all_genomes(genomes, dbpath, tmp_path, nbn, prodigal_only, logger, q
del
genomes
[
gen
]
del
genomes
[
gen
]
if
not
quiet
:
if
not
quiet
:
bar
.
finish
()
bar
.
finish
()
import
sys
sys
.
exit
(
0
)
def
analyse_genome
(
genome
,
dbpath
,
tmp_path
,
cut
,
pat
,
genomes
,
prodigal_only
,
logger
):
def
analyse_genome
(
genome
,
dbpath
,
tmp_path
,
cut
,
pat
,
genomes
,
prodigal_only
,
logger
):
...
@@ -126,8 +124,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
...
@@ -126,8 +124,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
bool
bool
True if genome analysis went well, False otherwise
True if genome analysis went well, False otherwise
"""
"""
print
(
"
***** GENOME genome analyzed:
"
+
genome
)
gpath
,
grespath
=
get_output_dir
(
prodigal_only
,
dbpath
,
tmp_path
,
genome
,
cut
,
pat
)
gpath
,
grespath
=
get_output_dir
(
prodigal_only
,
dbpath
,
tmp_path
,
genome
,
cut
,
pat
)
# Open original sequence file
# Open original sequence file
...
@@ -145,7 +141,7 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
...
@@ -145,7 +141,7 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
# Read each line of original sequence
# Read each line of original sequence
for
line
in
genf
:
for
line
in
genf
:
#### NEW CONTIG
#### NEW CONTIG
#
If l
ine corresponding to a new contig
#
L
ine corresponding to a new contig
if
line
.
startswith
(
"
>
"
):
if
line
.
startswith
(
"
>
"
):
# If not first contig, write info to output file (of needed)
# If not first contig, write info to output file (of needed)
if
cur_seq
!=
""
:
if
cur_seq
!=
""
:
...
@@ -161,11 +157,13 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
...
@@ -161,11 +157,13 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
# If prokka, contig name is 1st word, 1st 15 characters
# If prokka, contig name is 1st word, 1st 15 characters
else
:
else
:
cur_contig_name
=
line
.
split
()[
0
][:
15
]
cur_contig_name
=
line
.
split
()[
0
][:
15
]
# Initialize for next contig
cur_seq
=
""
cur_seq
=
""
# #### SEQUENCE LINE
# #### SEQUENCE LINE
# If sequence line, keep it, all in upper case
# If sequence line, keep it, all in upper case
else
:
else
:
cur_seq
+=
line
.
strip
().
upper
()
+
"
\n
"
# Add this line without \n to sequence (1 sequence per line)
cur_seq
+=
line
.
strip
().
upper
()
# LAST CONTIG
# LAST CONTIG
if
cur_contig_name
!=
""
:
if
cur_contig_name
!=
""
:
...
@@ -189,7 +187,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
...
@@ -189,7 +187,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
# If we wrote a new sequence file, close it
# If we wrote a new sequence file, close it
if
grespath
:
if
grespath
:
gresf
.
close
()
gresf
.
close
()
print
(
genomes
)
return
True
return
True
...
@@ -277,16 +274,18 @@ def format_contig(cut, pat, cur_seq, cur_contig_name, contig_sizes, gresf, num,
...
@@ -277,16 +274,18 @@ def format_contig(cut, pat, cur_seq, cur_contig_name, contig_sizes, gresf, num,
bool
bool
True if contig has been written without problem, False if any problem
True if contig has been written without problem, False if any problem
"""
"""
# "CUT" if cut: need to cut at each 'pat' -> write new header + seq in new file
# "CUT" if cut: need to cut at each 'pat' -> write new header + seq in new file
if
cut
:
if
cut
:
# Cut sequence and write header + sequence to res file
# Cut sequence and write header + sequence to res file
num
=
split_contig
(
pat
,
cur_seq
,
cur_contig_name
,
contig_sizes
,
gresf
,
num
)
num
=
split_contig
(
pat
,
cur_seq
,
cur_contig_name
,
contig_sizes
,
gresf
,
num
)
# PROKKA User does not want to cut, but will annotate with prokka
# PROKKA User does not want to cut, but will annotate with prokka, so we still
# have to create a new sequence file
elif
gresf
:
elif
gresf
:
gresf
.
write
(
"
{}_{}
\n
"
.
format
(
cur_contig_name
,
num
))
new_contig_name
=
"
{}_{}
\n
"
.
format
(
cur_contig_name
,
num
)
gresf
.
write
(
cur_seq
)
gresf
.
write
(
new_contig_name
)
contig_sizes
[
cur_contig_name
]
=
len
(
cur_seq
)
gresf
.
write
(
cur_seq
+
"
\n
"
)
contig_sizes
[
new_contig_name
]
=
len
(
cur_seq
)
num
+=
1
# PRODIGAL No cut, and prodigal used -> no new file created, but check
# PRODIGAL No cut, and prodigal used -> no new file created, but check
# contig unique names
# contig unique names
else
:
else
:
...
@@ -333,9 +332,9 @@ def split_contig(pat, whole_seq, cur_contig_name, contig_sizes, gresf, num):
...
@@ -333,9 +332,9 @@ def split_contig(pat, whole_seq, cur_contig_name, contig_sizes, gresf, num):
# we get empty contigs, if 2 occurrences of the pattern are side by side).
# we get empty contigs, if 2 occurrences of the pattern are side by side).
if
len
(
seq
)
==
0
:
if
len
(
seq
)
==
0
:
continue
continue
cur_name
=
cur_contig_name
+
"
_
"
+
str
(
num
)
new_contig_name
=
"
{}_{}
\n
"
.
format
(
cur_contig_name
,
num
)
contig_sizes
[
cur_name
]
=
len
(
seq
)
contig_sizes
[
cur_name
]
=
len
(
seq
)
gresf
.
write
(
cur_name
+
"
\n
"
)
gresf
.
write
(
new_contig_name
)
gresf
.
write
(
seq
+
"
\n
"
)
gresf
.
write
(
seq
+
"
\n
"
)
num
+=
1
num
+=
1
return
num
return
num
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment