Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
capsuledb
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
gem
capsuledb
Commits
91e60a1c
Commit
91e60a1c
authored
Jan 19, 2016
by
Bertrand NÉRON
Browse files
Options
Downloads
Patches
Plain Diff
fix lot of small bugs
use argparse instead of optparse raise error when system-id is not specify
parent
7110fefd
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/parser/parser.py
+43
-40
43 additions, 40 deletions
src/parser/parser.py
with
43 additions
and
40 deletions
src/parser/parser.py
+
43
−
40
View file @
91e60a1c
...
@@ -26,9 +26,11 @@ def replicon_parser(replicon_data):
...
@@ -26,9 +26,11 @@ def replicon_parser(replicon_data):
:rtype: dict
:rtype: dict
"""
"""
replicon_db
=
{}
replicon_db
=
{}
Replicon_info
=
namedtuple
(
'
Replicon_info
'
,
(
'
name
'
,
'
taxid
'
,
'
strain
'
,
'
taxonomy
'
,
'
type
'
))
Replicon_info
=
namedtuple
(
'
Replicon_info
'
,
(
'
name
'
,
'
ncbi_id
'
,
'
taxid
'
,
'
strain
'
,
'
taxonomy
'
,
'
type
'
))
with
open
(
replicon_data
,
'
r
'
)
as
replicon_file
:
with
open
(
replicon_data
,
'
r
'
)
as
replicon_file
:
line_nb
=
0
for
line
in
replicon_file
:
for
line
in
replicon_file
:
line_nb
+=
1
if
not
line
.
startswith
(
'
#
'
):
if
not
line
.
startswith
(
'
#
'
):
line
=
line
.
strip
()
line
=
line
.
strip
()
fields
=
line
.
split
(
'
\t
'
)
fields
=
line
.
split
(
'
\t
'
)
...
@@ -43,12 +45,13 @@ def replicon_parser(replicon_data):
...
@@ -43,12 +45,13 @@ def replicon_parser(replicon_data):
taxonomy
=
fields
[
4
].
split
(
'
;
'
)
taxonomy
=
fields
[
4
].
split
(
'
;
'
)
# remove ending dot or semi-colon from the last term of taxonnomy
# remove ending dot or semi-colon from the last term of taxonnomy
if
taxonomy
[
-
1
].
endswith
(
'
.
'
)
or
taxonomy
[
-
1
].
endswith
(
'
;
'
):
if
taxonomy
[
-
1
].
endswith
(
'
.
'
)
or
taxonomy
[
-
1
].
endswith
(
'
;
'
):
taxonomy
[
-
1
]
=
taxonomy
[
-
1
][:
-
1
]
taxonomy
=
taxonomy
[
-
1
][:
-
1
]
replicon_type
=
fields
[
5
]
replicon_type
=
fields
[
5
]
replicon_db
[
replicon_id
]
=
Replicon_info
(
replicon_id
,
ncbi_id
,
taxid
,
strain
,
replicon_db
[
replicon_id
]
=
Replicon_info
(
replicon_id
,
ncbi_id
,
taxid
,
strain
,
taxonomy
,
replicon_type
)
taxonomy
,
replicon_type
)
except
Exception
as
err
:
except
Exception
as
err
:
raise
Exception
(
"
Error during parsing line : {
0
} : {
1
}
"
.
format
(
line
,
err
))
raise
Exception
(
"
Error during parsing line
{0}
: {
1
} : {
2
}
"
.
format
(
line_nb
,
line
,
err
))
return
replicon_db
return
replicon_db
def
system_parser
(
system_data
):
def
system_parser
(
system_data
):
...
@@ -67,13 +70,16 @@ def system_parser(system_data):
...
@@ -67,13 +70,16 @@ def system_parser(system_data):
)
)
with
open
(
system_data
,
'
r
'
)
as
system_file
:
with
open
(
system_data
,
'
r
'
)
as
system_file
:
line_nb
=
0
for
line
in
system_file
:
for
line
in
system_file
:
line_nb
+=
1
if
line
[
0
]
!=
'
#
'
:
if
line
[
0
]
!=
'
#
'
:
line
=
line
.
strip
()
line
=
line
.
strip
()
fields
=
line
.
split
(
'
\t
'
)
fields
=
line
.
split
(
'
\t
'
)
gene_code
=
fields
[
0
]
gene_code
=
fields
[
0
]
if
gene_code
in
system_db
:
if
gene_code
in
system_db
:
raise
KeyError
(
"
duplicate replicon:
"
+
fields
[
0
])
raise
KeyError
(
"
duplicate replicon:
"
+
fields
[
0
])
try
:
gene_id
=
fields
[
1
]
gene_id
=
fields
[
1
]
protein_length
=
int
(
fields
[
2
])
protein_length
=
int
(
fields
[
2
])
strand
=
fields
[
3
]
if
fields
[
3
]
!=
'
-
'
else
None
strand
=
fields
[
3
]
if
fields
[
3
]
!=
'
-
'
else
None
...
@@ -88,10 +94,14 @@ def system_parser(system_data):
...
@@ -88,10 +94,14 @@ def system_parser(system_data):
match_end
=
int
(
fields
[
11
])
if
fields
[
11
]
!=
'
-
'
else
None
match_end
=
int
(
fields
[
11
])
if
fields
[
11
]
!=
'
-
'
else
None
replicon_id
=
fields
[
12
]
replicon_id
=
fields
[
12
]
predicted_system
=
fields
[
13
]
if
fields
[
13
]
!=
'
-
'
else
None
predicted_system
=
fields
[
13
]
if
fields
[
13
]
!=
'
-
'
else
None
system_id
=
fields
[
14
]
if
fields
[
14
]
!=
'
-
'
else
None
system_id
=
fields
[
14
]
if
system_id
==
'
-
'
:
raise
RuntimeError
(
"
System-Id is empty
"
)
system_status
=
fields
[
15
]
if
fields
[
15
]
!=
'
-
'
else
None
system_status
=
fields
[
15
]
if
fields
[
15
]
!=
'
-
'
else
None
gene_name
=
fields
[
16
]
if
fields
[
16
]
else
None
gene_name
=
fields
[
16
]
if
fields
[
16
]
else
None
description
=
fields
[
17
]
if
fields
[
17
]
else
None
description
=
fields
[
17
]
if
fields
[
17
]
else
None
except
Exception
as
err
:
raise
RuntimeError
(
"
Error during parsing line {0}: {1} : {2}
"
.
format
(
line_nb
,
line
,
err
))
gene
=
Gene
(
gene_code
,
gene
=
Gene
(
gene_code
,
gene_id
,
gene_id
,
protein_length
,
protein_length
,
...
@@ -182,7 +192,6 @@ def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_upd
...
@@ -182,7 +192,6 @@ def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_upd
secretion_system
.
genes
=
genes
secretion_system
.
genes
=
genes
secreton_db
.
save_doc
(
secretion_system
,
force_update
=
force_update
)
secreton_db
.
save_doc
(
secretion_system
,
force_update
=
force_update
)
if
__name__
==
'
__main__
'
:
if
__name__
==
'
__main__
'
:
import
argparse
import
argparse
import
sys
import
sys
...
@@ -194,7 +203,7 @@ if __name__ == '__main__':
...
@@ -194,7 +203,7 @@ if __name__ == '__main__':
return
user
,
password
return
user
,
password
usage
=
"""
usage
=
"""
%prog [options]
%
(
prog
)s
[options]
parse a file containing replicon informations and a file containing system informations
parse a file containing replicon informations and a file containing system informations
and fill a couchDB data base with these informations
and fill a couchDB data base with these informations
"""
"""
...
@@ -202,23 +211,19 @@ if __name__ == '__main__':
...
@@ -202,23 +211,19 @@ if __name__ == '__main__':
server_opt
=
parser
.
add_argument_group
(
title
=
"
Server Options
"
)
server_opt
=
parser
.
add_argument_group
(
title
=
"
Server Options
"
)
server_opt
.
add_argument
(
"
-S
"
,
"
--server
"
,
server_opt
.
add_argument
(
"
-S
"
,
"
--server
"
,
action
=
"
store
"
,
action
=
"
store
"
,
type
=
"
string
"
,
dest
=
"
server_url
"
,
dest
=
"
server_url
"
,
help
=
"
the url of the couchDB server (with the port)
"
)
help
=
"
the url of the couchDB server (with the port)
"
)
server_opt
.
add_argument
(
"
-d
"
,
"
--database
"
,
server_opt
.
add_argument
(
"
-d
"
,
"
--database
"
,
action
=
"
store
"
,
action
=
"
store
"
,
type
=
"
string
"
,
dest
=
"
db_name
"
,
dest
=
"
db_name
"
,
help
=
"
the name of the data base
"
)
help
=
"
the name of the data base
"
)
parsing_opt
=
parser
.
add_argument_group
(
title
=
"
Parsing Options
"
)
parsing_opt
=
parser
.
add_argument_group
(
title
=
"
Parsing Options
"
)
parsing_opt
.
add_argument
(
"
-r
"
,
"
--replicon
"
,
parsing_opt
.
add_argument
(
"
-r
"
,
"
--replicon
"
,
action
=
"
store
"
,
action
=
"
store
"
,
type
=
"
string
"
,
dest
=
"
replicon_path
"
,
dest
=
"
replicon_path
"
,
help
=
"
the path to the replicon file to parse
"
)
help
=
"
the path to the replicon file to parse
"
)
parsing_opt
.
add_argument
(
"
-s
"
,
"
--system
"
,
parsing_opt
.
add_argument
(
"
-s
"
,
"
--system
"
,
action
=
"
store
"
,
action
=
"
store
"
,
type
=
"
string
"
,
dest
=
"
system_path
"
,
dest
=
"
system_path
"
,
help
=
"
the path to the system secretion file to parse
"
)
help
=
"
the path to the system secretion file to parse
"
)
parsing_opt
.
add_argument
(
"
-f
"
,
"
--force_update
"
,
parsing_opt
.
add_argument
(
"
-f
"
,
"
--force_update
"
,
...
@@ -226,43 +231,41 @@ if __name__ == '__main__':
...
@@ -226,43 +231,41 @@ if __name__ == '__main__':
dest
=
"
force_update
"
,
dest
=
"
force_update
"
,
default
=
False
,
default
=
False
,
help
=
"
insert document even if there is already a document with the same id (replace it)
"
)
help
=
"
insert document even if there is already a document with the same id (replace it)
"
)
options
,
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
not
option
s
.
server_url
:
if
not
arg
s
.
server_url
:
print
(
"
You must specify a server url
"
,
file
=
sys
.
stderr
)
print
(
"
You must specify a server url
"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
not
option
s
.
db_name
:
if
not
arg
s
.
db_name
:
print
(
"
You must specify a data base name
"
,
file
=
sys
.
stderr
)
print
(
"
You must specify a data base name
"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
not
option
s
.
replicon_path
:
if
not
arg
s
.
replicon_path
:
print
(
"
You must specify the path to the replicon information file
"
,
file
=
sys
.
stderr
)
print
(
"
You must specify the path to the replicon information file
"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
not
option
s
.
system_path
:
if
not
arg
s
.
system_path
:
print
(
"
You must specify the path to the secretion system information file
"
,
file
=
sys
.
stderr
)
print
(
"
You must specify the path to the secretion system information file
"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
replicon_db
=
replicon_parser
(
option
s
.
replicon_path
)
replicon_db
=
replicon_parser
(
arg
s
.
replicon_path
)
system_db
=
system_parser
(
option
s
.
system_path
)
system_db
=
system_parser
(
arg
s
.
system_path
)
try_again
=
0
try_again
=
0
while
True
:
while
True
:
user
,
password
=
get_credentials
()
user
,
password
=
get_credentials
()
try
:
try
:
fill_db
(
option
s
.
server_url
,
option
s
.
db_name
,
user
,
password
,
fill_db
(
arg
s
.
server_url
,
arg
s
.
db_name
,
user
,
password
,
replicon_db
,
system_db
,
force_update
=
option
s
.
force_update
)
replicon_db
,
system_db
,
force_update
=
arg
s
.
force_update
)
break
break
except
restkit
.
errors
.
Unauthorized
as
err
:
except
restkit
.
errors
.
Unauthorized
as
err
:
print
(
"
Bad authentication, try again
"
,
file
=
sys
.
stderr
)
try_again
+=
1
try_again
+=
1
if
try_again
>
2
:
if
try_again
>
2
:
sys
.
exit
(
"
Authentication failure
"
)
sys
.
exit
(
"
Authentication failure
"
)
except
Exception
,
err
:
except
Exception
as
err
:
print
(
err
,
file
=
sys
.
stderr
)
sys
.
exit
(
2
)
sys
.
exit
(
2
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment