Commit 91e60a1c authored by Bertrand  NÉRON's avatar Bertrand NÉRON
Browse files

fix lot of small bugs

use argparse instead of optparse
raise error when system-id is not specify
parent 7110fefd
...@@ -26,9 +26,11 @@ def replicon_parser(replicon_data): ...@@ -26,9 +26,11 @@ def replicon_parser(replicon_data):
:rtype: dict :rtype: dict
""" """
replicon_db = {} replicon_db = {}
Replicon_info = namedtuple('Replicon_info', ('name', 'taxid', 'strain', 'taxonomy', 'type')) Replicon_info = namedtuple('Replicon_info', ('name', 'ncbi_id', 'taxid', 'strain', 'taxonomy', 'type'))
with open(replicon_data, 'r') as replicon_file: with open(replicon_data, 'r') as replicon_file:
line_nb = 0
for line in replicon_file: for line in replicon_file:
line_nb += 1
if not line.startswith('#'): if not line.startswith('#'):
line = line.strip() line = line.strip()
fields = line.split('\t') fields = line.split('\t')
...@@ -43,12 +45,13 @@ def replicon_parser(replicon_data): ...@@ -43,12 +45,13 @@ def replicon_parser(replicon_data):
taxonomy = fields[4].split('; ') taxonomy = fields[4].split('; ')
# remove ending dot or semi-colon from the last term of taxonnomy # remove ending dot or semi-colon from the last term of taxonnomy
if taxonomy[-1].endswith('.') or taxonomy[-1].endswith(';'): if taxonomy[-1].endswith('.') or taxonomy[-1].endswith(';'):
taxonomy[-1] = taxonomy[-1][:-1] taxonomy = taxonomy[-1][:-1]
replicon_type = fields[5] replicon_type = fields[5]
replicon_db[replicon_id] = Replicon_info(replicon_id, ncbi_id, taxid, strain, replicon_db[replicon_id] = Replicon_info(replicon_id, ncbi_id, taxid, strain,
taxonomy, replicon_type) taxonomy, replicon_type)
except Exception as err: except Exception as err:
raise Exception("Error during parsing line : {0} : {1}".format(line, err)) raise Exception("Error during parsing line {0}: {1} : {2}".format(line_nb, line, err))
return replicon_db return replicon_db
def system_parser(system_data): def system_parser(system_data):
...@@ -66,14 +69,17 @@ def system_parser(system_data): ...@@ -66,14 +69,17 @@ def system_parser(system_data):
'score', 'i_evalue', 'coverage', 'match_begin', 'match_end', 'name', 'description') 'score', 'i_evalue', 'coverage', 'match_begin', 'match_end', 'name', 'description')
) )
with open(system_data, 'r') as system_file : with open(system_data, 'r') as system_file:
line_nb = 0
for line in system_file: for line in system_file:
line_nb += 1
if line[0] != '#': if line[0] != '#':
line = line.strip() line = line.strip()
fields = line.split('\t') fields = line.split('\t')
gene_code = fields[0] gene_code = fields[0]
if gene_code in system_db: if gene_code in system_db:
raise KeyError("duplicate replicon:" + fields[0]) raise KeyError("duplicate replicon:" + fields[0])
try:
gene_id = fields[1] gene_id = fields[1]
protein_length = int(fields[2]) protein_length = int(fields[2])
strand = fields[3] if fields[3] != '-' else None strand = fields[3] if fields[3] != '-' else None
...@@ -88,10 +94,14 @@ def system_parser(system_data): ...@@ -88,10 +94,14 @@ def system_parser(system_data):
match_end = int(fields[11]) if fields[11] != '-' else None match_end = int(fields[11]) if fields[11] != '-' else None
replicon_id = fields[12] replicon_id = fields[12]
predicted_system = fields[13] if fields[13] != '-' else None predicted_system = fields[13] if fields[13] != '-' else None
system_id = fields[14] if fields[14] != '-' else None system_id = fields[14]
if system_id == '-':
raise RuntimeError("System-Id is empty")
system_status = fields[15] if fields[15] != '-' else None system_status = fields[15] if fields[15] != '-' else None
gene_name = fields[16] if fields[16] else None gene_name = fields[16] if fields[16] else None
description = fields[17] if fields[17] else None description = fields[17] if fields[17] else None
except Exception as err:
raise RuntimeError("Error during parsing line {0}: {1} : {2}".format(line_nb, line, err))
gene = Gene(gene_code, gene = Gene(gene_code,
gene_id, gene_id,
protein_length, protein_length,
...@@ -182,7 +192,6 @@ def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_upd ...@@ -182,7 +192,6 @@ def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_upd
secretion_system.genes = genes secretion_system.genes = genes
secreton_db.save_doc(secretion_system, force_update=force_update) secreton_db.save_doc(secretion_system, force_update=force_update)
if __name__ == '__main__': if __name__ == '__main__':
import argparse import argparse
import sys import sys
...@@ -194,7 +203,7 @@ if __name__ == '__main__': ...@@ -194,7 +203,7 @@ if __name__ == '__main__':
return user, password return user, password
usage = """ usage = """
%prog [options] %(prog)s [options]
parse a file containing replicon informations and a file containing system informations parse a file containing replicon informations and a file containing system informations
and fill a couchDB data base with these informations and fill a couchDB data base with these informations
""" """
...@@ -202,23 +211,19 @@ if __name__ == '__main__': ...@@ -202,23 +211,19 @@ if __name__ == '__main__':
server_opt = parser.add_argument_group(title="Server Options") server_opt = parser.add_argument_group(title="Server Options")
server_opt.add_argument("-S", "--server", server_opt.add_argument("-S", "--server",
action="store", action="store",
type="string",
dest="server_url", dest="server_url",
help="the url of the couchDB server (with the port)") help="the url of the couchDB server (with the port)")
server_opt.add_argument("-d", "--database", server_opt.add_argument("-d", "--database",
action="store", action="store",
type="string",
dest="db_name", dest="db_name",
help="the name of the data base") help="the name of the data base")
parsing_opt = parser.add_argument_group(title="Parsing Options") parsing_opt = parser.add_argument_group(title="Parsing Options")
parsing_opt.add_argument("-r", "--replicon", parsing_opt.add_argument("-r", "--replicon",
action="store", action="store",
type="string",
dest="replicon_path", dest="replicon_path",
help="the path to the replicon file to parse") help="the path to the replicon file to parse")
parsing_opt.add_argument("-s", "--system", parsing_opt.add_argument("-s", "--system",
action="store", action="store",
type="string",
dest="system_path", dest="system_path",
help="the path to the system secretion file to parse") help="the path to the system secretion file to parse")
parsing_opt.add_argument("-f", "--force_update", parsing_opt.add_argument("-f", "--force_update",
...@@ -226,43 +231,41 @@ if __name__ == '__main__': ...@@ -226,43 +231,41 @@ if __name__ == '__main__':
dest="force_update", dest="force_update",
default=False, default=False,
help="insert document even if there is already a document with the same id (replace it)") help="insert document even if there is already a document with the same id (replace it)")
options, args = parser.parse_args() args = parser.parse_args()
if not options.server_url: if not args.server_url:
print("You must specify a server url", file=sys.stderr) print("You must specify a server url", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
if not options.db_name: if not args.db_name:
print("You must specify a data base name", file=sys.stderr) print("You must specify a data base name", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
if not options.replicon_path: if not args.replicon_path:
print("You must specify the path to the replicon information file", file=sys.stderr) print("You must specify the path to the replicon information file", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
if not options.system_path: if not args.system_path:
print("You must specify the path to the secretion system information file", file=sys.stderr) print("You must specify the path to the secretion system information file", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
replicon_db = replicon_parser(options.replicon_path) replicon_db = replicon_parser(args.replicon_path)
system_db = system_parser(options.system_path) system_db = system_parser(args.system_path)
try_again = 0 try_again = 0
while True: while True:
user, password = get_credentials() user, password = get_credentials()
try: try:
fill_db(options.server_url, options.db_name, user, password, fill_db(args.server_url, args.db_name, user, password,
replicon_db, system_db, force_update=options.force_update) replicon_db, system_db, force_update=args.force_update)
break break
except restkit.errors.Unauthorized as err: except restkit.errors.Unauthorized as err:
print("Bad authentication, try again", file=sys.stderr)
try_again += 1 try_again += 1
if try_again > 2: if try_again > 2:
sys.exit("Authentication failure") sys.exit("Authentication failure")
except Exception, err: except Exception as err:
print(err, file=sys.stderr)
sys.exit(2) sys.exit(2)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment