Skip to content
Snippets Groups Projects
Commit 9ac9ba7f authored by Bertrand  NÉRON's avatar Bertrand NÉRON
Browse files

fix pep8 and python2.7 syntax

parent 2319b154
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,7 @@ Created on 27 dec. 2011
@author: Bertrand Néron
"""
from __future__ import print_function
from collections import namedtuple
from couchdbkit.client import Server
from couchdbkit.exceptions import ResourceNotFound
......@@ -14,7 +14,7 @@ from restkit import Resource, BasicAuth
import restkit.errors
def replicon_parser( replicon_data ):
def replicon_parser(replicon_data):
"""
parse a file containing the informations about replicons
@param replicon_data: the path of replicon information file
......@@ -23,27 +23,31 @@ def replicon_parser( replicon_data ):
@rtype: dict
"""
replicon_db = {}
Replicon_info = namedtuple('Replicon_info', 'name, taxid, strain, taxonomy, type')
with open( replicon_data , 'r' ) as replicon_file :
Replicon_info = namedtuple('Replicon_info', ('name', 'taxid', 'strain', 'taxonomy', 'type'))
with open(replicon_data, 'r') as replicon_file:
for line in replicon_file:
if line[0] != '#':
line = line.strip()
fields = line.split( '\t')
fields = line.split('\t')
if fields[0] in replicon_db:
raise KeyError( "duplicate replicon:" + fields[0])
raise KeyError("duplicate replicon:" + fields[0])
else:
try:
replicon_db[ fields[0] ] = Replicon_info( fields[0] , int(fields[1]) , fields[2] , fields[3].split('; ') , fields[4])
except Exception, err:
raise Exception( "Error during parsing line :"+line )
#remove ending dot or semi-colon from the last term of taxonnomy
if( replicon_db[ fields[0] ].taxonomy[-1].endswith('.') or replicon_db[ fields[0] ].taxonomy[-1].endswith(';')):
replicon_db[ fields[0] ].taxonomy[-1] = replicon_db[ fields[0] ].taxonomy[-1][:-1]
replicon_db[fields[0]] = Replicon_info(fields[0],
int(fields[1]),
fields[2],
fields[3].split('; '),
fields[4])
except Exception as err:
raise Exception("Error during parsing line: {0}\n{1}".format(line, err))
# remove ending dot or semi-colon from the last term of taxonomy
tax_last_char = replicon_db[fields[0]].taxonomy[-1]
if tax_last_char.endswith('.') or tax_last_char.endswith(';'):
replicon_db[fields[0]].taxonomy[-1] = replicon_db[fields[0]].taxonomy[-1][:-1]
return replicon_db
def system_parser( system_data ):
def system_parser(system_data):
"""
@param system_data: the path of secretion system information file
@type system_data: string
......@@ -51,44 +55,46 @@ def system_parser( system_data ):
@rtype: dict
"""
system_db = {}
System_info = namedtuple('System_info', 'code, T3SS_family, replicon, genes' )
Gene = namedtuple( 'Gene', 'code, id, protein_length, strand, begin, end, match, full_score, e_value, best_domain_score, best_domain_evalue, c_value, coverage_profile, match_begin, match_end, name, description')
System_info = namedtuple('System_info', 'code, T3SS_family, replicon, genes')
Gene = namedtuple('Gene', ('code', 'id', 'protein_length', 'strand', 'begin', 'end', 'match', 'full_score',
'e_value', 'best_domain_score', 'best_domain_evalue', 'c_value', 'coverage_profile',
'match_begin', 'match_end', 'name', 'description'))
with open( system_data , 'r' ) as system_file :
with open(system_data, 'r') as system_file:
for line in system_file:
if line[0] != '#':
line = line.strip()
fields = line.split( '\t')
fields = line.split('\t')
if fields[0] in system_db:
raise KeyError( "duplicate replicon:" + fields[0])
raise KeyError("duplicate replicon:" + fields[0])
else:
gene = Gene( fields[0],# code
fields[1],# id
int(fields[2]),# protein_length
fields[3] if fields[3] != '-' else None,# strand
int(fields[4]) if fields[4] != '-' else None ,# begin
int(fields[5]) if fields[5] != '-' else None,# end
fields[6] if fields[6] != '-' else None,# match
float(fields[7].replace( ',' , '.')) if fields[7] != '-' else None , # full_score
float(fields[8].replace( ',' , '.')) if fields[8] != '-' else None , # e_value
float(fields[9].replace( ',' , '.')) if fields[9] != '-' else None , # best_domain_score
float(fields[10].replace( ',' , '.')) if fields[10] != '-' else None ,# best_domain_evalue
float(fields[11].replace( ',' , '.')) if fields[11] != '-' else None ,# c_value
float(fields[12].replace( ',' , '.')) if fields[12] != '-' else None ,# coverage_profile
int(fields[13]) if fields[13] != '-' else None ,# match_begin
int(fields[14]) if fields[14] != '-' else None ,# match_end
fields[18] if fields[18] else None, # name
fields[19] if fields[19] else None, # description
)
gene = Gene(fields[0], # code
fields[1], # id
int(fields[2]), # protein_length
fields[3] if fields[3] != '-' else None, # strand
int(fields[4]) if fields[4] != '-' else None, # begin
int(fields[5]) if fields[5] != '-' else None, # end
fields[6] if fields[6] != '-' else None, # match
float(fields[7].replace(',', '.')) if fields[7] != '-' else None, # full_score
float(fields[8].replace(',', '.')) if fields[8] != '-' else None, # e_value
float(fields[9].replace(',', '.')) if fields[9] != '-' else None, # best_domain_score
float(fields[10].replace(',', '.')) if fields[10] != '-' else None, # best_domain_evalue
float(fields[11].replace(',', '.')) if fields[11] != '-' else None, # c_value
float(fields[12].replace(',', '.')) if fields[12] != '-' else None, # coverage_profile
int(fields[13]) if fields[13] != '-' else None, # match_begin
int(fields[14]) if fields[14] != '-' else None, # match_end
fields[18] if fields[18] else None, # name
fields[19] if fields[19] else None, # description
)
if fields[16] in system_db:
if gene.code in system_db[ fields[16] ]:
raise KeyError( "duplicate gene:" + fields[16])
if gene.code in system_db[fields[16]]:
raise KeyError("duplicate gene:" + fields[16])
else:
#append this gene to System_info genes
system_db[ fields[16] ].genes[ gene.code ] = gene
# append this gene to System_info genes
system_db[fields[16]].genes[gene.code] = gene
else:
#create a new Sysem_info entry
system_db[ fields[16] ] = System_info( fields[16] , fields[17] , fields[15] , genes = { gene.code : gene } )
# create a new System_info entry
system_db[fields[16]] = System_info(fields[16], fields[17], fields[15], genes={gene.code: gene})
return system_db
......@@ -96,17 +102,17 @@ from couchdbkit.schema import Document
from couchdbkit.schema.properties import *
class SecretionSystem( Document ):
class SecretionSystem(Document):
"""
a representation of a secretion System to be use with couchdb
"""
code = StringProperty( required=True )
code = StringProperty(required=True)
T3SS_family = StringProperty()
replicon = DictProperty()
genes = ListProperty()
replicon = DictProperty()
genes = ListProperty()
def fill_db( server_uri, db_name, user, passwd, replicon_db , system_db , force_update = False):
def fill_db(server_uri, db_name, user, passwd, replicon_db, system_db, force_update=False):
"""
@param server_uri: the url of the couchdb server (with port)
@type server_uri: string
......@@ -119,124 +125,117 @@ def fill_db( server_uri, db_name, user, passwd, replicon_db , system_db , force_
@param force_update: if true force the entry to be updated even if the _rev number is not provided
@type force_update: boolean
"""
auth = BasicAuth(user , passwd )
resource = CouchdbResource( server_uri , filters=[auth])
server = Server( resource_instance= resource )
secreton_db = server.get_or_create_db( db_name )
auth = BasicAuth(user, passwd )
resource = CouchdbResource(server_uri, filters=[auth])
server = Server(resource_instance=resource)
secreton_db = server.get_or_create_db(db_name)
system_codes = system_db.keys()
system_codes.sort()
for syst_code in system_codes:
system = system_db[syst_code]
replicon = replicon_db[ system.replicon ]
replicon = replicon_db[system.replicon]
secretion_system = SecretionSystem()
secretion_system._id = system.code
secretion_system._id = system.code
secretion_system.code = system.code
secretion_system.T3SS_family = system.T3SS_family
secretion_system.replicon = { 'name' : replicon.name ,
'taxid' : replicon.taxid,
'strain' : replicon.strain,
'taxonomy' : replicon.taxonomy,
'type' : replicon.type
secretion_system.replicon = {'name': replicon.name,
'taxid': replicon.taxid,
'strain': replicon.strain,
'taxonomy': replicon.taxonomy,
'type': replicon.type
}
genes_code = system.genes.keys()
genes_code.sort()
genes = []
for gene_code in genes_code:
gene = system.genes[ gene_code ]
gene = system.genes[gene_code]
g = {}
for field in gene._fields:
if getattr(gene , field ) is not None:
g[ field ]= getattr( gene , field )
genes.append( g )
for field in gene._fields:
if getattr(gene, field) is not None:
g[field] = getattr(gene, field)
genes.append(g)
secretion_system.genes = genes
secreton_db.save_doc( secretion_system , force_update= force_update )
secreton_db.save_doc(secretion_system, force_update=force_update)
if __name__ == '__main__':
from optparse import OptionParser , OptionGroup
import argparse
import sys
import getpass
def get_credentials():
user = raw_input('login: ')
password = getpass.getpass('password: ')
return user, password
user = raw_input('login: ')
password = getpass.getpass('password: ')
return user, password
usage="""
usage = """
%prog [options]
parse a file containing replicon informations and a file containing system informations
and fill a couchDB data base with these informations
"""
parser = OptionParser( usage= usage )
server_opt = OptionGroup(parser, "Server Options")
server_opt.add_option( "-S" , "--server" ,
action = "store",
type= "string" ,
dest = "server_url" ,
help = "the url of the couchDB server (with the port)")
server_opt.add_option( "-d" , "--database" ,
action = "store",
type= "string" ,
dest = "db_name" ,
help = "the name of the data base")
parser.add_option_group( server_opt )
parsing_opt = OptionGroup(parser, "Parsing Options")
parsing_opt.add_option( "-r" , "--replicon" ,
action = "store",
type= "string" ,
dest = "replicon_path" ,
help = "the path to the replicon file to parse")
parsing_opt.add_option( "-s" , "--system" ,
action = "store",
type= "string" ,
dest = "system_path" ,
help = "the path to the system secretion file to parse")
parsing_opt.add_option( "-f" , "--force_update" ,
action= "store_true",
dest = "force_update" ,
default = False,
help = "")
parser.add_option_group( parsing_opt )
parser = argparse.ArgumentParser(usage=usage)
server_opt = parser.add_argument_group(title="Server Options")
server_opt.add_argument("-S", "--server",
action="store",
type="string",
dest="server_url",
help="the url of the couchDB server (with the port)")
server_opt.add_argument("-d", "--database",
action="store",
type="string",
dest="db_name",
help="the name of the data base")
parsing_opt = parser.add_argument_group(title="Parsing Options")
parsing_opt.add_argument("-r", "--replicon",
action="store",
type="string",
dest="replicon_path",
help="the path to the replicon file to parse")
parsing_opt.add_argument("-s", "--system",
action="store",
type="string",
dest="system_path",
help="the path to the system secretion file to parse")
parsing_opt.add_argument("-f", "--force_update",
action="store_true",
dest="force_update",
default=False,
help="")
options, args = parser.parse_args()
if not options.server_url:
print >> sys.stderr , "You must specify a server url"
print("You must specify a server url", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
if not options.db_name:
print >> sys.stderr , "You must specify a data base name"
print("You must specify a data base name", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
if not options.replicon_path:
print >> sys.stderr , "You must specify the path to the replicon information file"
print("You must specify the path to the replicon information file", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
if not options.system_path:
print >> sys.stderr , "You must specify the path to the secretion system information file"
print("You must specify the path to the secretion system information file", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
replicon_db = replicon_parser( options.replicon_path )
system_db = system_parser( options.system_path )
replicon_db = replicon_parser(options.replicon_path)
system_db = system_parser(options.system_path)
try_again = 0
while True:
user, password = get_credentials()
try:
fill_db(options.server_url, options.db_name, user, password, replicon_db, system_db, force_update = options.force_update)
fill_db(options.server_url, options.db_name, user, password, replicon_db, system_db,
force_update=options.force_update)
break
except restkit.errors.Unauthorized, err:
print >> sys.stderr, "Bad authentication, try again"
except restkit.errors.Unauthorized as err:
print("Bad authentication, try again", file=sys.stderr)
try_again += 1
if try_again > 2:
sys.exit("Authentication failure")
except Exception, err:
print >> sys.stderr, err
sys.exit(2)
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment