Commit 9ac9ba7f authored by Bertrand  NÉRON's avatar Bertrand NÉRON
Browse files

fix pep8 and python2.7 syntax

parent 2319b154
...@@ -5,7 +5,7 @@ Created on 27 dec. 2011 ...@@ -5,7 +5,7 @@ Created on 27 dec. 2011
@author: Bertrand Néron @author: Bertrand Néron
""" """
from __future__ import print_function
from collections import namedtuple from collections import namedtuple
from couchdbkit.client import Server from couchdbkit.client import Server
from couchdbkit.exceptions import ResourceNotFound from couchdbkit.exceptions import ResourceNotFound
...@@ -14,7 +14,7 @@ from restkit import Resource, BasicAuth ...@@ -14,7 +14,7 @@ from restkit import Resource, BasicAuth
import restkit.errors import restkit.errors
def replicon_parser( replicon_data ): def replicon_parser(replicon_data):
""" """
parse a file containing the informations about replicons parse a file containing the informations about replicons
@param replicon_data: the path of replicon information file @param replicon_data: the path of replicon information file
...@@ -23,27 +23,31 @@ def replicon_parser( replicon_data ): ...@@ -23,27 +23,31 @@ def replicon_parser( replicon_data ):
@rtype: dict @rtype: dict
""" """
replicon_db = {} replicon_db = {}
Replicon_info = namedtuple('Replicon_info', 'name, taxid, strain, taxonomy, type') Replicon_info = namedtuple('Replicon_info', ('name', 'taxid', 'strain', 'taxonomy', 'type'))
with open( replicon_data , 'r' ) as replicon_file : with open(replicon_data, 'r') as replicon_file:
for line in replicon_file: for line in replicon_file:
if line[0] != '#': if line[0] != '#':
line = line.strip() line = line.strip()
fields = line.split( '\t') fields = line.split('\t')
if fields[0] in replicon_db: if fields[0] in replicon_db:
raise KeyError( "duplicate replicon:" + fields[0]) raise KeyError("duplicate replicon:" + fields[0])
else: else:
try: try:
replicon_db[ fields[0] ] = Replicon_info( fields[0] , int(fields[1]) , fields[2] , fields[3].split('; ') , fields[4]) replicon_db[fields[0]] = Replicon_info(fields[0],
except Exception, err: int(fields[1]),
raise Exception( "Error during parsing line :"+line ) fields[2],
#remove ending dot or semi-colon from the last term of taxonnomy fields[3].split('; '),
if( replicon_db[ fields[0] ].taxonomy[-1].endswith('.') or replicon_db[ fields[0] ].taxonomy[-1].endswith(';')): fields[4])
replicon_db[ fields[0] ].taxonomy[-1] = replicon_db[ fields[0] ].taxonomy[-1][:-1] except Exception as err:
raise Exception("Error during parsing line: {0}\n{1}".format(line, err))
# remove ending dot or semi-colon from the last term of taxonomy
tax_last_char = replicon_db[fields[0]].taxonomy[-1]
if tax_last_char.endswith('.') or tax_last_char.endswith(';'):
replicon_db[fields[0]].taxonomy[-1] = replicon_db[fields[0]].taxonomy[-1][:-1]
return replicon_db return replicon_db
def system_parser(system_data):
def system_parser( system_data ):
""" """
@param system_data: the path of secretion system information file @param system_data: the path of secretion system information file
@type system_data: string @type system_data: string
...@@ -51,44 +55,46 @@ def system_parser( system_data ): ...@@ -51,44 +55,46 @@ def system_parser( system_data ):
@rtype: dict @rtype: dict
""" """
system_db = {} system_db = {}
System_info = namedtuple('System_info', 'code, T3SS_family, replicon, genes' ) System_info = namedtuple('System_info', 'code, T3SS_family, replicon, genes')
Gene = namedtuple( 'Gene', 'code, id, protein_length, strand, begin, end, match, full_score, e_value, best_domain_score, best_domain_evalue, c_value, coverage_profile, match_begin, match_end, name, description') Gene = namedtuple('Gene', ('code', 'id', 'protein_length', 'strand', 'begin', 'end', 'match', 'full_score',
'e_value', 'best_domain_score', 'best_domain_evalue', 'c_value', 'coverage_profile',
'match_begin', 'match_end', 'name', 'description'))
with open( system_data , 'r' ) as system_file : with open(system_data, 'r') as system_file:
for line in system_file: for line in system_file:
if line[0] != '#': if line[0] != '#':
line = line.strip() line = line.strip()
fields = line.split( '\t') fields = line.split('\t')
if fields[0] in system_db: if fields[0] in system_db:
raise KeyError( "duplicate replicon:" + fields[0]) raise KeyError("duplicate replicon:" + fields[0])
else: else:
gene = Gene( fields[0],# code gene = Gene(fields[0], # code
fields[1],# id fields[1], # id
int(fields[2]),# protein_length int(fields[2]), # protein_length
fields[3] if fields[3] != '-' else None,# strand fields[3] if fields[3] != '-' else None, # strand
int(fields[4]) if fields[4] != '-' else None ,# begin int(fields[4]) if fields[4] != '-' else None, # begin
int(fields[5]) if fields[5] != '-' else None,# end int(fields[5]) if fields[5] != '-' else None, # end
fields[6] if fields[6] != '-' else None,# match fields[6] if fields[6] != '-' else None, # match
float(fields[7].replace( ',' , '.')) if fields[7] != '-' else None , # full_score float(fields[7].replace(',', '.')) if fields[7] != '-' else None, # full_score
float(fields[8].replace( ',' , '.')) if fields[8] != '-' else None , # e_value float(fields[8].replace(',', '.')) if fields[8] != '-' else None, # e_value
float(fields[9].replace( ',' , '.')) if fields[9] != '-' else None , # best_domain_score float(fields[9].replace(',', '.')) if fields[9] != '-' else None, # best_domain_score
float(fields[10].replace( ',' , '.')) if fields[10] != '-' else None ,# best_domain_evalue float(fields[10].replace(',', '.')) if fields[10] != '-' else None, # best_domain_evalue
float(fields[11].replace( ',' , '.')) if fields[11] != '-' else None ,# c_value float(fields[11].replace(',', '.')) if fields[11] != '-' else None, # c_value
float(fields[12].replace( ',' , '.')) if fields[12] != '-' else None ,# coverage_profile float(fields[12].replace(',', '.')) if fields[12] != '-' else None, # coverage_profile
int(fields[13]) if fields[13] != '-' else None ,# match_begin int(fields[13]) if fields[13] != '-' else None, # match_begin
int(fields[14]) if fields[14] != '-' else None ,# match_end int(fields[14]) if fields[14] != '-' else None, # match_end
fields[18] if fields[18] else None, # name fields[18] if fields[18] else None, # name
fields[19] if fields[19] else None, # description fields[19] if fields[19] else None, # description
) )
if fields[16] in system_db: if fields[16] in system_db:
if gene.code in system_db[ fields[16] ]: if gene.code in system_db[fields[16]]:
raise KeyError( "duplicate gene:" + fields[16]) raise KeyError("duplicate gene:" + fields[16])
else: else:
#append this gene to System_info genes # append this gene to System_info genes
system_db[ fields[16] ].genes[ gene.code ] = gene system_db[fields[16]].genes[gene.code] = gene
else: else:
#create a new Sysem_info entry # create a new System_info entry
system_db[ fields[16] ] = System_info( fields[16] , fields[17] , fields[15] , genes = { gene.code : gene } ) system_db[fields[16]] = System_info(fields[16], fields[17], fields[15], genes={gene.code: gene})
return system_db return system_db
...@@ -96,17 +102,17 @@ from couchdbkit.schema import Document ...@@ -96,17 +102,17 @@ from couchdbkit.schema import Document
from couchdbkit.schema.properties import * from couchdbkit.schema.properties import *
class SecretionSystem( Document ): class SecretionSystem(Document):
""" """
a representation of a secretion System to be use with couchdb a representation of a secretion System to be use with couchdb
""" """
code = StringProperty( required=True ) code = StringProperty(required=True)
T3SS_family = StringProperty() T3SS_family = StringProperty()
replicon = DictProperty() replicon = DictProperty()
genes = ListProperty() genes = ListProperty()
def fill_db( server_uri, db_name, user, passwd, replicon_db , system_db , force_update = False): def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_update=False):
""" """
@param server_uri: the url of the couchdb server (with port) @param server_uri: the url of the couchdb server (with port)
@type server_uri: string @type server_uri: string
...@@ -119,124 +125,117 @@ def fill_db( server_uri, db_name, user, passwd, replicon_db , system_db , force_ ...@@ -119,124 +125,117 @@ def fill_db( server_uri, db_name, user, passwd, replicon_db , system_db , force_
@param force_update: if true force the entry to be updated even if the _rev number is not provided @param force_update: if true force the entry to be updated even if the _rev number is not provided
@type force_update: boolean @type force_update: boolean
""" """
auth = BasicAuth(user , passwd ) auth = BasicAuth(user, passwd )
resource = CouchdbResource( server_uri , filters=[auth]) resource = CouchdbResource(server_uri, filters=[auth])
server = Server( resource_instance= resource ) server = Server(resource_instance=resource)
secreton_db = server.get_or_create_db( db_name ) secreton_db = server.get_or_create_db(db_name)
system_codes = system_db.keys() system_codes = system_db.keys()
system_codes.sort() system_codes.sort()
for syst_code in system_codes: for syst_code in system_codes:
system = system_db[syst_code] system = system_db[syst_code]
replicon = replicon_db[ system.replicon ] replicon = replicon_db[system.replicon]
secretion_system = SecretionSystem() secretion_system = SecretionSystem()
secretion_system._id = system.code secretion_system._id = system.code
secretion_system.code = system.code secretion_system.code = system.code
secretion_system.T3SS_family = system.T3SS_family secretion_system.T3SS_family = system.T3SS_family
secretion_system.replicon = { 'name' : replicon.name , secretion_system.replicon = {'name': replicon.name,
'taxid' : replicon.taxid, 'taxid': replicon.taxid,
'strain' : replicon.strain, 'strain': replicon.strain,
'taxonomy' : replicon.taxonomy, 'taxonomy': replicon.taxonomy,
'type' : replicon.type 'type': replicon.type
} }
genes_code = system.genes.keys() genes_code = system.genes.keys()
genes_code.sort() genes_code.sort()
genes = [] genes = []
for gene_code in genes_code: for gene_code in genes_code:
gene = system.genes[ gene_code ] gene = system.genes[gene_code]
g = {} g = {}
for field in gene._fields: for field in gene._fields:
if getattr(gene , field ) is not None: if getattr(gene, field) is not None:
g[ field ]= getattr( gene , field ) g[field] = getattr(gene, field)
genes.append( g ) genes.append(g)
secretion_system.genes = genes secretion_system.genes = genes
secreton_db.save_doc( secretion_system , force_update= force_update ) secreton_db.save_doc(secretion_system, force_update=force_update)
if __name__ == '__main__': if __name__ == '__main__':
from optparse import OptionParser , OptionGroup import argparse
import sys import sys
import getpass import getpass
def get_credentials(): def get_credentials():
user = raw_input('login: ') user = raw_input('login: ')
password = getpass.getpass('password: ') password = getpass.getpass('password: ')
return user, password return user, password
usage=""" usage = """
%prog [options] %prog [options]
parse a file containing replicon informations and a file containing system informations parse a file containing replicon informations and a file containing system informations
and fill a couchDB data base with these informations and fill a couchDB data base with these informations
""" """
parser = OptionParser( usage= usage ) parser = argparse.ArgumentParser(usage=usage)
server_opt = OptionGroup(parser, "Server Options") server_opt = parser.add_argument_group(title="Server Options")
server_opt.add_option( "-S" , "--server" , server_opt.add_argument("-S", "--server",
action = "store", action="store",
type= "string" , type="string",
dest = "server_url" , dest="server_url",
help = "the url of the couchDB server (with the port)") help="the url of the couchDB server (with the port)")
server_opt.add_option( "-d" , "--database" , server_opt.add_argument("-d", "--database",
action = "store", action="store",
type= "string" , type="string",
dest = "db_name" , dest="db_name",
help = "the name of the data base") help="the name of the data base")
parser.add_option_group( server_opt ) parsing_opt = parser.add_argument_group(title="Parsing Options")
parsing_opt.add_argument("-r", "--replicon",
parsing_opt = OptionGroup(parser, "Parsing Options") action="store",
parsing_opt.add_option( "-r" , "--replicon" , type="string",
action = "store", dest="replicon_path",
type= "string" , help="the path to the replicon file to parse")
dest = "replicon_path" , parsing_opt.add_argument("-s", "--system",
help = "the path to the replicon file to parse") action="store",
parsing_opt.add_option( "-s" , "--system" , type="string",
action = "store", dest="system_path",
type= "string" , help="the path to the system secretion file to parse")
dest = "system_path" , parsing_opt.add_argument("-f", "--force_update",
help = "the path to the system secretion file to parse") action="store_true",
parsing_opt.add_option( "-f" , "--force_update" , dest="force_update",
action= "store_true", default=False,
dest = "force_update" , help="")
default = False,
help = "")
parser.add_option_group( parsing_opt )
options, args = parser.parse_args() options, args = parser.parse_args()
if not options.server_url: if not options.server_url:
print >> sys.stderr , "You must specify a server url" print("You must specify a server url", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
if not options.db_name: if not options.db_name:
print >> sys.stderr , "You must specify a data base name" print("You must specify a data base name", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
if not options.replicon_path: if not options.replicon_path:
print >> sys.stderr , "You must specify the path to the replicon information file" print("You must specify the path to the replicon information file", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
if not options.system_path: if not options.system_path:
print >> sys.stderr , "You must specify the path to the secretion system information file" print("You must specify the path to the secretion system information file", file=sys.stderr)
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
replicon_db = replicon_parser(options.replicon_path)
replicon_db = replicon_parser( options.replicon_path ) system_db = system_parser(options.system_path)
system_db = system_parser( options.system_path )
try_again = 0 try_again = 0
while True: while True:
user, password = get_credentials() user, password = get_credentials()
try: try:
fill_db(options.server_url, options.db_name, user, password, replicon_db, system_db, force_update = options.force_update) fill_db(options.server_url, options.db_name, user, password, replicon_db, system_db,
force_update=options.force_update)
break break
except restkit.errors.Unauthorized, err: except restkit.errors.Unauthorized as err:
print >> sys.stderr, "Bad authentication, try again" print("Bad authentication, try again", file=sys.stderr)
try_again += 1 try_again += 1
if try_again > 2: if try_again > 2:
sys.exit("Authentication failure") sys.exit("Authentication failure")
except Exception, err:
print >> sys.stderr, err
sys.exit(2)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment