Commit 4246cc08 authored by Hervé Ménager's avatar Hervé Ménager

Merge branch 'master' of https://projets.pasteur.fr/git/bioweb

parents 9f978d8d 5861ea4e
#! /usr/bin/env python
# import re
import sys
from pprint import pprint
#---- globals
#---- PACKAGES KEYS
pack_mandatory_keys = [ 'NAME'
, 'CATEGORIES'
, 'DESCRIPTION'
, 'VERSION'
]
pack_other_keys = { 'HOME' : ''
, 'SOURCE' : ''
, 'AUTHORS' : []
, 'HTMLDOCS' : []
, 'MANPAGES' : []
, 'REF' : []
}
pack_gensoft_keys = { 'HISTORY' : []
, 'LIBRARY' : ''
, 'PRIVATE' : ''
, 'RESTRICT' : ''
, 'ORIGIN': ''
}
pack_accessory_keys = { 'MAINTAINER' : ''
, 'LICENSE' : ''
, 'LANGUAGE' : []
}
#---- PROGRAMS KEYS
prog_mandatory_keys = [ 'NAME'
]
prog_other_keys = { 'DESCRIPTION' : ''
, 'CATEGORIES' : []
, 'MANPAGES' : []
, 'HTMLDOCS' : []
}
prog_gensoft_keys = { 'PRIVATE' : ''
, 'RESTRICT' : ''
}
prog_mobyle_keys = { 'WEB' : ''
}
pack_accessory_keys = { 'USE' : []
}
def get_doi(item):
'''
extract doi from reference entry
returns reference, doi
'''
DOIFLAGS = ['doi', 'pmid', 'pmcid']
tmp = item.lower()
found = False
for flag in DOIFLAGS:
if flag in tmp:
found = flag
if not found:
return item, None
idx = tmp.find(found)
citation = item[:idx]
doi = tmp[idx:]
citation = citation.strip()
doi = doi.strip()
# remove trailing dot if any
doi = doi.strip('.')
return citation, doi
def get_value(data):
'''
for internal use
split biodocs TAGS: value
return TAG name, associated value
'''
# double colon may appears in value
fields = data.split(':')
tag = fields[0]
value = ':'.join(fields[1:]).strip()
_, tag = tag.split('.')
return tag, value
def get_pack(fh):
'''
package specific definition entries parser
returns a dictionary
'''
res = {}
#---- deal with package definitions.
while True:
pos = fh.tell()
line = fh.readline()
if not line:
break
line = line.strip()
if not line or line.startswith('#'):
continue
line = line.decode('iso-8859-1').encode('utf-8')
if line.startswith('PROG'):
fh.seek(pos,0)
break
#--- skip values
elif line.startswith('PACK.USE'):
pass
elif line.startswith('PACK.MAINTAINER'):
pass
#---- One line one value entries
elif line.startswith('PACK.NAME'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.HOME'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.SOURCE'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.AUTHORS'):
k, v = get_value(line)
v = v.split(',')
res[k] = v
elif line.startswith('PACK.LICENSE'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.LIBRARY'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.PRIVATE'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.RESTRICT'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.MANPAGES'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.ORIGIN'):
k, v = get_value(line)
res[k] = v
#---- One line multi values entries
elif line.startswith('PACK.VERSION'):
k, v = get_value(line)
res[k] = v.split()
elif line.startswith('PACK.CATEGORIES'):
k, v = get_value(line)
res[k] = v.split()
elif line.startswith('PACK.HTMLDOCS'):
k, v = get_value(line)
res[k] = [v]
elif line.startswith('PACK.LANGUAGE'):
k, v = get_value(line)
res[k] = v.split()
#---- multi line entries
elif line.startswith('PACK.DESCRIPTION'):
k, v = get_value(line)
res[k] = [v]
elif line.startswith('PACK.REF'):
k, v = get_value(line)
res[k] = [v]
elif line.startswith('PACK.HISTORY'):
k, v = get_value(line)
res[k] = [v]
#---- problems to fix, report
elif line.startswith('PACK'):
print >> sys.stderr, "unknown PACK tag:", line
sys.exit(1)
else:
res[k].append(line)
return res
def get_progs(fh):
'''
program entries parser
return a dictionary of programs description as dictionary
key = progname : value = prog information as dictionary
'''
prog_lst = []
#---- deal with prog definitions.
prog = {}
line = fh.readline()
while line:
line = line.strip()
if not line:
if prog: # avoid empty progs based on multiple empty lines
prog_lst.append(prog)
prog = {}
elif line.startswith('PROG.USE'):
pass
elif line.startswith('PROG.PRIVATE'):
pass
elif line.startswith('#'):
pass
elif line.startswith('PROG.NAME'):
k, v = get_value(line)
prog[k] = v
elif line.startswith('PROG.DESCRIPTION'):
k, v = get_value(line)
prog[k] = [v]
elif line.startswith('PROG.CATEGORIES'):
k, v = get_value(line)
prog[k] = v.split()
elif line.startswith('PROG.HTMLDOCS'):
k, v = get_value(line)
prog[k] = [v]
elif line.startswith('PROG.MANPAGES'):
k, v = get_value(line)
prog[k] = [v]
#---- One line multi values entries
elif line.startswith('PROG.WEB'):
k, v = get_value(line)
prog[k] = [v]
#---- problems to fix, report
elif line.startswith('PROG'):
print >> sys.stderr, "unknown PROG tag:", line
sys.exit(1)
elif line.startswith('PACK'):
print >> sys.stderr, "unknown PACK tag PROGS section:", line
sys.exit(1)
else:
prog[k].append(line)
line = fh.readline()
if prog:
prog_lst.append(prog)
return prog_lst
def pack_consolidate(datas):
'''
fill missing entries on pack
inplace modification
'''
#--- check for package mandatory keys
for key in pack_mandatory_keys:
if key not in datas:
print >> sys.stderr, datas['NAME'], 'missing PACK mandatory key', key
sys.exit(1)
#--- silently insert missing packages keys
checks = [pack_other_keys, pack_gensoft_keys]
for item in checks:
for key, default_val in item.items():
if key not in datas:
datas[key] = default_val
def progs_consolidate(datas):
'''
fills prog descriptions entries with missing info
inplace modification
'''
for prog in datas:
if not prog:
continue
#--- check for programs mandatory keys
for key in prog_mandatory_keys:
if key not in prog:
print >> sys.stderr, 'missing PROG mandatory key', key
sys.exit(1)
#--- silently insert missing programs keys
checks = [prog_other_keys, prog_gensoft_keys]
for item in checks:
for key, default_value in item.items():
if key not in prog:
prog[key] = default_value
def Parser(fh):
'''
parsing of BIODOCS files, returned as a dictionary
Key == BIODOC tag
Val == content
WARNING: NO semantic verification
'''
pack = get_pack(fh)
progs = get_progs(fh)
if not progs and not 'LIBRARY' in pack:
print sys.stderr, "no programs description found"
pack_consolidate(pack)
progs_consolidate(progs)
return pack, progs
if __name__ == '__main__':
for biodocs in sys.argv[1:]:
print biodocs
fh = open(biodocs)
pack, progs = Parser(fh)
fh.close()
print "PACKAGE"
pprint(pack)
print "PROGRAMS"
pprint(progs)
NAM=biodocs2mongo
PREFIX=/local/gensoft2/adm
BIN=$(PREFIX)/bin
PYMODULEDIR=$(PREFIX)/share/gensoft/pymodules
DAT=$(PREFIX)/share/gensoft/$(NAM)
build:
sed -i -e 's,^\#!.*python,\#! /local/gensoft2/adm/bin/python,' *.py
sed -e 's|^DAT=.*|DAT="$(DAT)"|' \
-e 's|^PYMODULEDIR=.*|PYMODULEDIR="$(PYMODULEDIR)"|' $(NAM).py > $(NAM)
install: build
test -d $(DAT) || mkdir -p $(DAT)
install -m 0775 $(NAM) $(BIN)
install -m 0664 config.cfg $(DAT)
$(MAKE) clean
clean:
rm -f $(NAM)
rm -f *.pyc
uninstall:
rm -f $(BIN)/$(NAM) $(DAT)/*
rm -rf $(DAT)
This diff is collapsed.
import pymongo
import sys
import ssl
HOST='bioweb-pro.web.pasteur.fr'
PORT=27017
FATAL= 1
WARN= 0
VERBOSE= 1
LOGFH= sys.stdout
ERRFH= sys.stderr
JOURNALING=True # implies w=1
WRITECONCERN=1
DB_DEF='bioweb'
#client.max_message_size # size in bytes of max message
BULKSIZE = 10000
def error(exit_val, *msg):
head=['Warning', 'Error']
print >> ERRFH, "%s: %s" % (head[exit_val], " - ".join(map(str, msg)))
if exit_val:
sys.exit(exit_val)
return None
def log(*msg):
if VERBOSE:
print >> LOGFH, "%s" %(' '.join(map(str, msg)))
def get_DB(host, port, db_name=DB_DEF, j=JOURNALING, w=WRITECONCERN, ssl=True, ssl_cert_reqs=ssl.CERT_NONE ):
log('connect to', host, 'on port:', port)
try:
client = pymongo.MongoClient(host, port, ssl=True, ssl_cert_reqs=ssl.CERT_NONE) #, j=JOURNALING)
except pymongo.errors.ConnectionFailure as err:
error(FATAL, "mongodb %s/%s" %(host, port), err)
print ">>>>>>>>>>>>>>>", client
return client[db_name]
def undot(key_name):
return key_name.replace('.', '@')
def redot(key_name):
return key_name.replace('.', '@')
if __name__ == '__main__':
db = get_DB(HOST, PORT)
col = db['catalog']
[DEFAULT]
ID_SEPARATOR: @
VERBOSE: 1
[MOBYLE]
MOBYLEURL: http://mobyle.pasteur.fr
[MONGO]
HOST: bioweb-prod.web.pasteur.fr
PORT: 27017
DB: bioweb
COL: catalog
JOURNALING: True
WRITECONCERN: 1
[BIODOCS]
BIODOCS_FILE: BIODOCS.yaml
# file generated by edam_mapper.py
EDAM-0000288 operation_0288
EDAM-0000289 operation_0289
EDAM-0000425 operation_0425
EDAM-0000219 topic_0219
EDAM-0000164 topic_0164
EDAM-0001813 operation_1813
EDAM-0000283 operation_0283
EDAM-0000284 operation_0284
EDAM-0000285 operation_0285
EDAM-0000286 operation_0286
EDAM-0000287 operation_0287
EDAM-0000569 operation_0569
EDAM-0003023 operation_3023
EDAM-0000659 topic_0659
EDAM-0000415 operation_0415
EDAM-0002428 operation_2428
EDAM-0000416 operation_0416
EDAM-0000419 operation_0419
EDAM-0000418 operation_0418
EDAM-0000563 operation_0563
EDAM-0000562 operation_0562
EDAM-0002422 operation_2422
EDAM-0000564 operation_0564
EDAM-0002420 operation_2420
EDAM-0002421 operation_2421
EDAM-0002277 topic_2277
EDAM-0002275 topic_2275
EDAM-0000203 topic_0203
EDAM-0000369 operation_0369
EDAM-0000499 operation_0499
EDAM-0000491 operation_0491
EDAM-0000490 operation_0490
EDAM-0000365 operation_0365
EDAM-0000492 operation_0492
EDAM-0000495 operation_0495
EDAM-0000362 operation_0362
EDAM-0000361 operation_0361
EDAM-0000496 operation_0496
EDAM-0000798 topic_0798
EDAM-0002506 operation_2506
EDAM-0000426 operation_0426
EDAM-0000128 topic_0128
EDAM-0003088 operation_3088
EDAM-0000797 topic_0797
EDAM-0000622 topic_0622
EDAM-0000301 operation_0301
EDAM-0000298 operation_0298
EDAM-0000623 topic_0623
EDAM-0000110 topic_0110
EDAM-0000247 operation_0247
EDAM-0000291 operation_0291
EDAM-0000290 operation_0290
EDAM-0000114 topic_0114
EDAM-0000296 operation_0296
EDAM-0000160 topic_0160
EDAM-0003087 operation_3087
EDAM-0002438 operation_2438
EDAM-0002476 operation_2476
EDAM-0003081 operation_3081
EDAM-0000545 operation_0545
EDAM-0000554 operation_0554
EDAM-0002437 operation_2437
EDAM-0000555 operation_0555
EDAM-0000323 operation_0323
EDAM-0002241 operation_2241
EDAM-0000194 topic_0194
EDAM-0000461 operation_0461
EDAM-0000196 topic_0196
EDAM-0000515 operation_0515
EDAM-0000516 operation_0516
EDAM-0000092 topic_0092
EDAM-0000356 operation_0356
EDAM-0003096 operation_3096
EDAM-0000354 operation_0354
EDAM-0000624 topic_0624
EDAM-0002089 operation_2089
EDAM-0000199 topic_0199
EDAM-0003056 topic_3056
EDAM-0000255 operation_0255
EDAM-0000108 topic_0108
EDAM-0000350 operation_0350
EDAM-0000102 topic_0102
EDAM-0000567 operation_0567
EDAM-0002429 operation_2429
EDAM-0000107 topic_0107
EDAM-0000511 operation_0511
EDAM-0002501 operation_2501
EDAM-0000100 topic_0100
EDAM-0000226 operation_0226
EDAM-0000227 operation_0227
EDAM-0000483 operation_0483
EDAM-0000220 topic_0220
EDAM-0000477 operation_0477
EDAM-0000182 topic_0182
EDAM-0000347 operation_0347
EDAM-0002520 operation_2520
EDAM-0002448 operation_2448
EDAM-0000340 operation_0340
EDAM-0000258 operation_0258
EDAM-0000470 operation_0470
EDAM-0000565 operation_0565
EDAM-0000188 topic_0188
EDAM-0002423 operation_2423
EDAM-0002412 operation_2412
EDAM-0003074 topic_3074
EDAM-0000552 operation_0552
EDAM-0000566 operation_0566
EDAM-0002439 operation_2439
EDAM-0003041 topic_3041
EDAM-0000655 topic_0655
EDAM-0000432 operation_0432
EDAM-0000137 topic_0137
EDAM-0000325 operation_0325
EDAM-0002515 operation_2515
EDAM-0001812 operation_1812
EDAM-0000238 operation_0238
EDAM-0000526 operation_0526
EDAM-0002466 operation_2466
EDAM-0000231 operation_0231
EDAM-0000230 operation_0230
EDAM-0000233 operation_0233
EDAM-0000232 operation_0232
EDAM-0000767 topic_0767
EDAM-0000237 operation_0237
EDAM-0000236 operation_0236
EDAM-0000330 operation_0330
EDAM-0000239 operation_0239
EDAM-0000539 operation_0539
EDAM-0000334 operation_0334
EDAM-0000335 operation_0335
EDAM-0000336 operation_0336
EDAM-0000337 operation_0337
EDAM-0000338 operation_0338
EDAM-0000533 operation_0533
EDAM-0000448 operation_0448
EDAM-0000368 operation_0368
EDAM-0002452 operation_2452
EDAM-0002451 operation_2451
EDAM-0000367 operation_0367
EDAM-0002120 operation_2120
EDAM-0002269 topic_2269
EDAM-0002575 operation_2575
EDAM-0000121 topic_0121
EDAM-0000123 topic_0123
EDAM-0000364 operation_0364
EDAM-0002489 operation_2489
EDAM-0000363 operation_0363
EDAM-0000478 operation_0478
EDAM-0003062 topic_3062
EDAM-0000637 topic_0637
EDAM-0002508 operation_2508
EDAM-0002238 operation_2238
EDAM-0000242 operation_0242
EDAM-0000004 operation_0004
EDAM-0000455 operation_0455
EDAM-0002502 operation_2502
EDAM-0000451 operation_0451
EDAM-0000450 operation_0450
EDAM-0002507 operation_2507
EDAM-0000452 operation_0452
EDAM-0000525 operation_0525
EDAM-0000524 operation_0524
EDAM-0000527 operation_0527
EDAM-0003053 topic_3053
EDAM-0000249 operation_0249
EDAM-0000523 operation_0523
EDAM-0000324 operation_0324
EDAM-0000082 topic_0082
EDAM-0000245 operation_0245
EDAM-0000080 topic_0080
EDAM-0000081 topic_0081
EDAM-0000240 operation_0240
EDAM-0000084 topic_0084
EDAM-0000085 topic_0085
EDAM-0000099 topic_0099
EDAM-0002426 operation_2426
EDAM-0002816 topic_2816
EDAM-0000694 topic_0694
EDAM-0002946 operation_2946
EDAM-0002443 operation_2443
EDAM-0000282 operation_0282
EDAM-0002814 topic_2814
EDAM-0000259 operation_0259
EDAM-0000313 operation_0313
EDAM-0003316 topic_3316
EDAM-0000398 operation_0398
EDAM-0000154 topic_0154
EDAM-0000740 topic_0740
EDAM-0000157 topic_0157
EDAM-0000747 topic_0747
EDAM-0000319 operation_0319
EDAM-0000749 topic_0749
EDAM-0000748 topic_0748
EDAM-0000422 operation_0422
EDAM-0000315 operation_0315
EDAM-0000158 topic_0158
EDAM-0000159 topic_0159
EDAM-0000310 operation_0310
EDAM-0000311 operation_0311
EDAM-0000550 operation_0550
EDAM-0000094 topic_0094
EDAM-0000097 topic_0097
EDAM-0000621 topic_0621
EDAM-0000091 topic_0091
EDAM-0000090 topic_0090
EDAM-0002121 operation_2121
EDAM-0000625 topic_0625
EDAM-0000558 operation_0558
EDAM-0000256 operation_0256
EDAM-0002284 operation_2284
EDAM-0000253 operation_0253
EDAM-0002478 operation_2478
EDAM-0000250 operation_0250
EDAM-0000447 operation_0447
EDAM-0000551 operation_0551
EDAM-0003024 operation_3024
EDAM-0000820 topic_0820
EDAM-0000821 topic_0821
EDAM-0002488 operation_2488
EDAM-0000380 operation_0380
EDAM-0002454 operation_2454
EDAM-0000235 operation_0235
EDAM-0003055 topic_3055
EDAM-0000475 operation_0475
EDAM-0002480 operation_2480
EDAM-0000370 operation_0370
EDAM-0000417 operation_0417
EDAM-0000278 operation_0278
EDAM-0000309 operation_0309
EDAM-0000308 operation_0308
EDAM-0000438 operation_0438
EDAM-0000140 topic_0140
EDAM-0000433 operation_0433
EDAM-0000300 operation_0300
EDAM-0000431 operation_0431
EDAM-0000430 operation_0430
EDAM-0003072 topic_3072
EDAM-0000436 operation_0436
EDAM-0000326 operation_0326