Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gem
capsuledb
Commits
91e60a1c
Commit
91e60a1c
authored
Jan 19, 2016
by
Bertrand NÉRON
Browse files
fix lot of small bugs
use argparse instead of optparse raise error when system-id is not specify
parent
7110fefd
Changes
1
Show whitespace changes
Inline
Side-by-side
src/parser/parser.py
View file @
91e60a1c
...
@@ -26,9 +26,11 @@ def replicon_parser(replicon_data):
...
@@ -26,9 +26,11 @@ def replicon_parser(replicon_data):
:rtype: dict
:rtype: dict
"""
"""
replicon_db
=
{}
replicon_db
=
{}
Replicon_info
=
namedtuple
(
'Replicon_info'
,
(
'name'
,
'taxid'
,
'strain'
,
'taxonomy'
,
'type'
))
Replicon_info
=
namedtuple
(
'Replicon_info'
,
(
'name'
,
'ncbi_id'
,
'taxid'
,
'strain'
,
'taxonomy'
,
'type'
))
with
open
(
replicon_data
,
'r'
)
as
replicon_file
:
with
open
(
replicon_data
,
'r'
)
as
replicon_file
:
line_nb
=
0
for
line
in
replicon_file
:
for
line
in
replicon_file
:
line_nb
+=
1
if
not
line
.
startswith
(
'#'
):
if
not
line
.
startswith
(
'#'
):
line
=
line
.
strip
()
line
=
line
.
strip
()
fields
=
line
.
split
(
'
\t
'
)
fields
=
line
.
split
(
'
\t
'
)
...
@@ -43,12 +45,13 @@ def replicon_parser(replicon_data):
...
@@ -43,12 +45,13 @@ def replicon_parser(replicon_data):
taxonomy
=
fields
[
4
].
split
(
'; '
)
taxonomy
=
fields
[
4
].
split
(
'; '
)
# remove ending dot or semi-colon from the last term of taxonnomy
# remove ending dot or semi-colon from the last term of taxonnomy
if
taxonomy
[
-
1
].
endswith
(
'.'
)
or
taxonomy
[
-
1
].
endswith
(
';'
):
if
taxonomy
[
-
1
].
endswith
(
'.'
)
or
taxonomy
[
-
1
].
endswith
(
';'
):
taxonomy
[
-
1
]
=
taxonomy
[
-
1
][:
-
1
]
taxonomy
=
taxonomy
[
-
1
][:
-
1
]
replicon_type
=
fields
[
5
]
replicon_type
=
fields
[
5
]
replicon_db
[
replicon_id
]
=
Replicon_info
(
replicon_id
,
ncbi_id
,
taxid
,
strain
,
replicon_db
[
replicon_id
]
=
Replicon_info
(
replicon_id
,
ncbi_id
,
taxid
,
strain
,
taxonomy
,
replicon_type
)
taxonomy
,
replicon_type
)
except
Exception
as
err
:
except
Exception
as
err
:
raise
Exception
(
"Error during parsing line : {
0
} : {
1
}"
.
format
(
line
,
err
))
raise
Exception
(
"Error during parsing line
{0}
: {
1
} : {
2
}"
.
format
(
line_nb
,
line
,
err
))
return
replicon_db
return
replicon_db
def
system_parser
(
system_data
):
def
system_parser
(
system_data
):
...
@@ -66,14 +69,17 @@ def system_parser(system_data):
...
@@ -66,14 +69,17 @@ def system_parser(system_data):
'score'
,
'i_evalue'
,
'coverage'
,
'match_begin'
,
'match_end'
,
'name'
,
'description'
)
'score'
,
'i_evalue'
,
'coverage'
,
'match_begin'
,
'match_end'
,
'name'
,
'description'
)
)
)
with
open
(
system_data
,
'r'
)
as
system_file
:
with
open
(
system_data
,
'r'
)
as
system_file
:
line_nb
=
0
for
line
in
system_file
:
for
line
in
system_file
:
line_nb
+=
1
if
line
[
0
]
!=
'#'
:
if
line
[
0
]
!=
'#'
:
line
=
line
.
strip
()
line
=
line
.
strip
()
fields
=
line
.
split
(
'
\t
'
)
fields
=
line
.
split
(
'
\t
'
)
gene_code
=
fields
[
0
]
gene_code
=
fields
[
0
]
if
gene_code
in
system_db
:
if
gene_code
in
system_db
:
raise
KeyError
(
"duplicate replicon:"
+
fields
[
0
])
raise
KeyError
(
"duplicate replicon:"
+
fields
[
0
])
try
:
gene_id
=
fields
[
1
]
gene_id
=
fields
[
1
]
protein_length
=
int
(
fields
[
2
])
protein_length
=
int
(
fields
[
2
])
strand
=
fields
[
3
]
if
fields
[
3
]
!=
'-'
else
None
strand
=
fields
[
3
]
if
fields
[
3
]
!=
'-'
else
None
...
@@ -88,10 +94,14 @@ def system_parser(system_data):
...
@@ -88,10 +94,14 @@ def system_parser(system_data):
match_end
=
int
(
fields
[
11
])
if
fields
[
11
]
!=
'-'
else
None
match_end
=
int
(
fields
[
11
])
if
fields
[
11
]
!=
'-'
else
None
replicon_id
=
fields
[
12
]
replicon_id
=
fields
[
12
]
predicted_system
=
fields
[
13
]
if
fields
[
13
]
!=
'-'
else
None
predicted_system
=
fields
[
13
]
if
fields
[
13
]
!=
'-'
else
None
system_id
=
fields
[
14
]
if
fields
[
14
]
!=
'-'
else
None
system_id
=
fields
[
14
]
if
system_id
==
'-'
:
raise
RuntimeError
(
"System-Id is empty"
)
system_status
=
fields
[
15
]
if
fields
[
15
]
!=
'-'
else
None
system_status
=
fields
[
15
]
if
fields
[
15
]
!=
'-'
else
None
gene_name
=
fields
[
16
]
if
fields
[
16
]
else
None
gene_name
=
fields
[
16
]
if
fields
[
16
]
else
None
description
=
fields
[
17
]
if
fields
[
17
]
else
None
description
=
fields
[
17
]
if
fields
[
17
]
else
None
except
Exception
as
err
:
raise
RuntimeError
(
"Error during parsing line {0}: {1} : {2}"
.
format
(
line_nb
,
line
,
err
))
gene
=
Gene
(
gene_code
,
gene
=
Gene
(
gene_code
,
gene_id
,
gene_id
,
protein_length
,
protein_length
,
...
@@ -182,7 +192,6 @@ def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_upd
...
@@ -182,7 +192,6 @@ def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_upd
secretion_system
.
genes
=
genes
secretion_system
.
genes
=
genes
secreton_db
.
save_doc
(
secretion_system
,
force_update
=
force_update
)
secreton_db
.
save_doc
(
secretion_system
,
force_update
=
force_update
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
argparse
import
argparse
import
sys
import
sys
...
@@ -194,7 +203,7 @@ if __name__ == '__main__':
...
@@ -194,7 +203,7 @@ if __name__ == '__main__':
return
user
,
password
return
user
,
password
usage
=
"""
usage
=
"""
%prog [options]
%
(
prog
)s
[options]
parse a file containing replicon informations and a file containing system informations
parse a file containing replicon informations and a file containing system informations
and fill a couchDB data base with these informations
and fill a couchDB data base with these informations
"""
"""
...
@@ -202,23 +211,19 @@ if __name__ == '__main__':
...
@@ -202,23 +211,19 @@ if __name__ == '__main__':
server_opt
=
parser
.
add_argument_group
(
title
=
"Server Options"
)
server_opt
=
parser
.
add_argument_group
(
title
=
"Server Options"
)
server_opt
.
add_argument
(
"-S"
,
"--server"
,
server_opt
.
add_argument
(
"-S"
,
"--server"
,
action
=
"store"
,
action
=
"store"
,
type
=
"string"
,
dest
=
"server_url"
,
dest
=
"server_url"
,
help
=
"the url of the couchDB server (with the port)"
)
help
=
"the url of the couchDB server (with the port)"
)
server_opt
.
add_argument
(
"-d"
,
"--database"
,
server_opt
.
add_argument
(
"-d"
,
"--database"
,
action
=
"store"
,
action
=
"store"
,
type
=
"string"
,
dest
=
"db_name"
,
dest
=
"db_name"
,
help
=
"the name of the data base"
)
help
=
"the name of the data base"
)
parsing_opt
=
parser
.
add_argument_group
(
title
=
"Parsing Options"
)
parsing_opt
=
parser
.
add_argument_group
(
title
=
"Parsing Options"
)
parsing_opt
.
add_argument
(
"-r"
,
"--replicon"
,
parsing_opt
.
add_argument
(
"-r"
,
"--replicon"
,
action
=
"store"
,
action
=
"store"
,
type
=
"string"
,
dest
=
"replicon_path"
,
dest
=
"replicon_path"
,
help
=
"the path to the replicon file to parse"
)
help
=
"the path to the replicon file to parse"
)
parsing_opt
.
add_argument
(
"-s"
,
"--system"
,
parsing_opt
.
add_argument
(
"-s"
,
"--system"
,
action
=
"store"
,
action
=
"store"
,
type
=
"string"
,
dest
=
"system_path"
,
dest
=
"system_path"
,
help
=
"the path to the system secretion file to parse"
)
help
=
"the path to the system secretion file to parse"
)
parsing_opt
.
add_argument
(
"-f"
,
"--force_update"
,
parsing_opt
.
add_argument
(
"-f"
,
"--force_update"
,
...
@@ -226,43 +231,41 @@ if __name__ == '__main__':
...
@@ -226,43 +231,41 @@ if __name__ == '__main__':
dest
=
"force_update"
,
dest
=
"force_update"
,
default
=
False
,
default
=
False
,
help
=
"insert document even if there is already a document with the same id (replace it)"
)
help
=
"insert document even if there is already a document with the same id (replace it)"
)
options
,
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
not
option
s
.
server_url
:
if
not
arg
s
.
server_url
:
print
(
"You must specify a server url"
,
file
=
sys
.
stderr
)
print
(
"You must specify a server url"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
not
option
s
.
db_name
:
if
not
arg
s
.
db_name
:
print
(
"You must specify a data base name"
,
file
=
sys
.
stderr
)
print
(
"You must specify a data base name"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
not
option
s
.
replicon_path
:
if
not
arg
s
.
replicon_path
:
print
(
"You must specify the path to the replicon information file"
,
file
=
sys
.
stderr
)
print
(
"You must specify the path to the replicon information file"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
not
option
s
.
system_path
:
if
not
arg
s
.
system_path
:
print
(
"You must specify the path to the secretion system information file"
,
file
=
sys
.
stderr
)
print
(
"You must specify the path to the secretion system information file"
,
file
=
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
parser
.
print_help
(
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
replicon_db
=
replicon_parser
(
option
s
.
replicon_path
)
replicon_db
=
replicon_parser
(
arg
s
.
replicon_path
)
system_db
=
system_parser
(
option
s
.
system_path
)
system_db
=
system_parser
(
arg
s
.
system_path
)
try_again
=
0
try_again
=
0
while
True
:
while
True
:
user
,
password
=
get_credentials
()
user
,
password
=
get_credentials
()
try
:
try
:
fill_db
(
option
s
.
server_url
,
option
s
.
db_name
,
user
,
password
,
fill_db
(
arg
s
.
server_url
,
arg
s
.
db_name
,
user
,
password
,
replicon_db
,
system_db
,
force_update
=
option
s
.
force_update
)
replicon_db
,
system_db
,
force_update
=
arg
s
.
force_update
)
break
break
except
restkit
.
errors
.
Unauthorized
as
err
:
except
restkit
.
errors
.
Unauthorized
as
err
:
print
(
"Bad authentication, try again"
,
file
=
sys
.
stderr
)
try_again
+=
1
try_again
+=
1
if
try_again
>
2
:
if
try_again
>
2
:
sys
.
exit
(
"Authentication failure"
)
sys
.
exit
(
"Authentication failure"
)
except
Exception
,
err
:
except
Exception
as
err
:
print
(
err
,
file
=
sys
.
stderr
)
sys
.
exit
(
2
)
sys
.
exit
(
2
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment