Commit 45d40a98 authored by Eric  DEVEAUD's avatar Eric DEVEAUD

merge port to BIODOCS.yaml

parents 1cbc9b1a 9187ebd5
#! /usr/bin/env python
# import re
import sys
from pprint import pprint
#---- globals
#---- PACKAGES KEYS
pack_mandatory_keys = [ 'NAME'
, 'CATEGORIES'
, 'DESCRIPTION'
, 'VERSION'
]
pack_other_keys = { 'HOME' : ''
, 'SOURCE' : ''
, 'AUTHORS' : []
, 'HTMLDOCS' : []
, 'MANPAGES' : []
, 'REF' : []
}
pack_gensoft_keys = { 'HISTORY' : []
, 'LIBRARY' : ''
, 'PRIVATE' : ''
, 'RESTRICT' : ''
, 'ORIGIN': ''
}
pack_accessory_keys = { 'MAINTAINER' : ''
, 'LICENSE' : ''
, 'LANGUAGE' : []
}
#---- PROGRAMS KEYS
prog_mandatory_keys = [ 'NAME'
]
prog_other_keys = { 'DESCRIPTION' : ''
, 'CATEGORIES' : []
, 'MANPAGES' : []
, 'HTMLDOCS' : []
}
prog_gensoft_keys = { 'PRIVATE' : ''
, 'RESTRICT' : ''
}
prog_mobyle_keys = { 'WEB' : ''
}
pack_accessory_keys = { 'USE' : []
}
def get_doi(item):
'''
extract doi from reference entry
returns reference, doi
'''
DOIFLAGS = ['doi', 'pmid', 'pmcid']
tmp = item.lower()
found = False
for flag in DOIFLAGS:
if flag in tmp:
found = flag
if not found:
return item, None
idx = tmp.find(found)
citation = item[:idx]
doi = tmp[idx:]
citation = citation.strip()
doi = doi.strip()
# remove trailing dot if any
doi = doi.strip('.')
return citation, doi
def get_value(data):
'''
for internal use
split biodocs TAGS: value
return TAG name, associated value
'''
# double colon may appears in value
fields = data.split(':')
tag = fields[0]
value = ':'.join(fields[1:]).strip()
try:
_, tag = tag.split('.')
except ValueError as msg:
print >> sys.stderr, msg
print >> sys.stderr, tag
sys.exit(1)
return tag, value
def get_pack(fh):
'''
package specific definition entries parser
returns a dictionary
'''
res = {}
#---- deal with package definitions.
while True:
pos = fh.tell()
line = fh.readline()
if not line:
break
line = line.strip()
if not line or line.startswith('#'):
continue
line = line.decode('iso-8859-1').encode('utf-8')
if line.startswith('PROG'):
fh.seek(pos,0)
break
#--- skip values
elif line.startswith('PACK.USE'):
pass
elif line.startswith('PACK.MAINTAINER'):
pass
#---- One line one value entries
elif line.startswith('PACK.NAME'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.HOME'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.SOURCE'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.AUTHORS'):
k, v = get_value(line)
v = v.split(',')
res[k] = v
elif line.startswith('PACK.LICENSE'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.LIBRARY'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.PRIVATE'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.RESTRICT'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.MANPAGES'):
k, v = get_value(line)
res[k] = v
elif line.startswith('PACK.ORIGIN'):
k, v = get_value(line)
res[k] = v
#---- One line multi values entries
elif line.startswith('PACK.VERSION'):
k, v = get_value(line)
res[k] = v.split()
elif line.startswith('PACK.CATEGORIES'):
k, v = get_value(line)
res[k] = v.split()
elif line.startswith('PACK.HTMLDOCS'):
k, v = get_value(line)
res[k] = [v]
elif line.startswith('PACK.LANGUAGE'):
k, v = get_value(line)
res[k] = v.split()
#---- multi line entries
elif line.startswith('PACK.DESCRIPTION'):
k, v = get_value(line)
res[k] = [v]
elif line.startswith('PACK.REF'):
k, v = get_value(line)
res[k] = [v]
elif line.startswith('PACK.HISTORY'):
k, v = get_value(line)
res[k] = [v]
#---- problems to fix, report
elif line.startswith('PACK'):
print >> sys.stderr, "unknown PACK tag:", line
sys.exit(1)
else:
res[k].append(line)
return res
def get_progs(fh):
'''
program entries parser
return a dictionary of programs description as dictionary
key = progname : value = prog information as dictionary
'''
prog_lst = []
#---- deal with prog definitions.
prog = {}
line = fh.readline()
while line:
line = line.strip()
if not line:
if prog: # avoid empty progs based on multiple empty lines
prog_lst.append(prog)
prog = {}
elif line.startswith('PROG.USE'):
pass
elif line.startswith('PROG.PRIVATE'):
pass
elif line.startswith('#'):
pass
elif line.startswith('PROG.NAME'):
k, v = get_value(line)
prog[k] = v
elif line.startswith('PROG.DESCRIPTION'):
k, v = get_value(line)
prog[k] = [v]
elif line.startswith('PROG.CATEGORIES'):
k, v = get_value(line)
prog[k] = v.split()
elif line.startswith('PROG.HTMLDOCS'):
k, v = get_value(line)
prog[k] = [v]
elif line.startswith('PROG.MANPAGES'):
k, v = get_value(line)
prog[k] = [v]
#---- One line multi values entries
elif line.startswith('PROG.WEB'):
k, v = get_value(line)
prog[k] = [v]
#---- problems to fix, report
elif line.startswith('PROG'):
print >> sys.stderr, "unknown PROG tag:", line
sys.exit(1)
elif line.startswith('PACK'):
print >> sys.stderr, "unknown PACK tag PROGS section:", line
sys.exit(1)
else:
prog[k].append(line)
line = fh.readline()
if prog:
prog_lst.append(prog)
return prog_lst
def pack_consolidate(datas):
'''
fill missing entries on pack
inplace modification
'''
#--- check for package mandatory keys
for key in pack_mandatory_keys:
if key not in datas:
print >> sys.stderr, datas['NAME'], 'missing PACK mandatory key', key
sys.exit(1)
#--- silently insert missing packages keys
checks = [pack_other_keys, pack_gensoft_keys]
for item in checks:
for key, default_val in item.items():
if key not in datas:
datas[key] = default_val
def progs_consolidate(datas):
'''
fills prog descriptions entries with missing info
inplace modification
'''
for prog in datas:
if not prog:
continue
#--- check for programs mandatory keys
for key in prog_mandatory_keys:
if key not in prog:
print >> sys.stderr, 'missing PROG mandatory key', key
sys.exit(1)
#--- silently insert missing programs keys
checks = [prog_other_keys, prog_gensoft_keys]
for item in checks:
for key, default_value in item.items():
if key not in prog:
prog[key] = default_value
def Parser(fh):
'''
parsing of BIODOCS files, returned as a dictionary
Key == BIODOC tag
Val == content
WARNING: NO semantic verification
'''
pack = get_pack(fh)
progs = get_progs(fh)
if not progs and not 'LIBRARY' in pack:
print sys.stderr, "no programs description found"
pack_consolidate(pack)
progs_consolidate(progs)
return pack, progs
if __name__ == '__main__':
for biodocs in sys.argv[1:]:
print biodocs
fh = open(biodocs)
pack, progs = Parser(fh)
fh.close()
print "PACKAGE"
pprint(pack)
print "PROGRAMS"
pprint(progs)
NAM=biodocs2mongo
PREFIX=/local/gensoft2/adm
BIN=$(PREFIX)/bin
PYMODULEDIR=$(PREFIX)/share/gensoft/pymodules
DAT=$(PREFIX)/share/gensoft/$(NAM)
build:
sed -i -e 's,^\#!.*python,\#! /local/gensoft2/adm/bin/python,' *.py
sed -e 's|^DAT=.*|DAT="$(DAT)"|' \
-e 's|^PYMODULEDIR=.*|PYMODULEDIR="$(PYMODULEDIR)"|' $(NAM).py > $(NAM)
install: build
test -d $(DAT) || mkdir -p $(DAT)
install -m 0775 $(NAM) $(BIN)
install -m 0664 config.cfg $(DAT)
$(MAKE) clean
clean:
rm -f $(NAM)
rm -f *.pyc
uninstall:
rm -f $(BIN)/$(NAM) $(DAT)/*
rm -rf $(DAT)
This diff is collapsed.
import pymongo
import sys
import ssl
HOST='bioweb-pro.web.pasteur.fr'
PORT=27017
FATAL= 1
WARN= 0
VERBOSE= 1
LOGFH= sys.stdout
ERRFH= sys.stderr
JOURNALING=True # implies w=1
WRITECONCERN=1
DB_DEF='bioweb'
#client.max_message_size # size in bytes of max message
BULKSIZE = 10000
def error(exit_val, *msg):
head=['Warning', 'Error']
print >> ERRFH, "%s: %s" % (head[exit_val], " - ".join(map(str, msg)))
if exit_val:
sys.exit(exit_val)
return None
def log(*msg):
if VERBOSE:
print >> LOGFH, "%s" %(' '.join(map(str, msg)))
def get_DB(host, port, db_name=DB_DEF, j=JOURNALING, w=WRITECONCERN, ssl=True, ssl_cert_reqs=ssl.CERT_NONE ):
log('connect to', host, 'on port:', port)
try:
client = pymongo.MongoClient(host, port, ssl=True, ssl_cert_reqs=ssl.CERT_NONE) #, j=JOURNALING)
except pymongo.errors.ConnectionFailure as err:
error(FATAL, "mongodb %s/%s" %(host, port), err)
print ">>>>>>>>>>>>>>>", client
return client[db_name]
def undot(key_name):
return key_name.replace('.', '@')
def redot(key_name):
return key_name.replace('.', '@')
if __name__ == '__main__':
db = get_DB(HOST, PORT)
col = db['catalog']
[DEFAULT]
ID_SEPARATOR: @
VERBOSE: 1
[MOBYLE]
MOBYLEURL: http://mobyle.pasteur.fr
[MONGO]
HOST: bioweb-prod.web.pasteur.fr
PORT: 27017
DB: bioweb
COL: catalog
JOURNALING: True
WRITECONCERN: 1
[BIODOCS]
BIODOCS_FILE: BIODOCS.yaml
This diff is collapsed.
align_reorder/20111202
bambe/4.01
bigorf_extract/1.0
bionj/None
blast/2.2.21
blast/2.2.26
blast2seqids/1.4
blast2taxonomy/2.1
blastTaxoAnalysis/1.0
BMGE/1.0
boxshade/3.3.1
cap3/04-15-05
CBS/None
cif/0.2.2
Clustal-Omega/1.1.0
ClustalW/2.0.12
CodonW/1.4.4
comalign/None
combat/1.0
concatfasta/1.00
CONJscan-T4SSscan/None
consensus/2004-04-14
cosa/None
dca/1.1
dialign/2.2.1
dssp/2000
ELP/2.0
EMBOSS/6.3.1
extend_align/1.0
fasta/3.6
fastaRename/1.0
fastDNAml/1.2.2
fetchSequences/1.0
forest2consense/None
gblocks/0.91b
genscan/1.0
golden/1.1a
growthpred/v1.07
gruppi/1.0
hmmer/3.0
html4blast/1.7a
imogene/1.0-253
ktreedist/1.0
mafft/6.849
melting/4.2g
mfold/3.6
morePhyML/1.14
mreps/2.5
msa/2.1
msaprobs/0.9.4
mspcrunch/2.5
muscle/3.8.31
mview/1.49
newick-utils/1.6
njplot/20051109
nw_cat/GNU 7.4
pdb-lib/1.0
penncnv/2009.08.27
pftools/2.3
phylip/3.67
phyml/20130219.patch
pima/1.40
pratt/2.1
predator/2.1.2
primo/None
prose/0.02a
psort/981201
puzzle/5.2
quicktree/1.1
rbvotree/1.00
repeatoire/1.0
repeats/1.1
saps/07-01-1995
saxs_merge/r17125
scan_for_matches/97
seq-gen/1.3.2
sig/1.0
smile/1.47
squizz/0.99b
T3SSscan-FLAGscan/None
tacg/4.1.0
taxoptimizer/1.1
tipdate/1.2
toppred/1.10
treealign/None
trnascan/1.23
ViennaRNA/1.8.4
weighbor/1.2.1
wise2/2.2.0
xpound/None
xxr/3.03
pack_info = [ 'align_reorder/20111202'
, 'bambe/4.01'
, 'bigorf_extract/1.0'
, 'bionj/NA'
# , 'blast/2.2.21'
, 'blast/2.2.26'
, 'blast2seqids/1.4'
, 'blast2taxonomy/2.1'
, 'blastTaxoAnalysis/1.0'
, 'BMGE/1.12'
, 'boxshade/3.3.1'
, 'cap3/04-15-05'
, 'tmhmm/NA'
, 'cif/0.2.2'
, 'Clustal-Omega/1.1.0'
, 'ClustalW/2.0.12'
, 'CodonW/1.4.4'
, 'comalign/NA'
, 'combat/1.0'
, 'concatfasta/1.00'
, 'consensus/2004-04-14'
, 'cosa/NA'
, 'dca/1.1'
, 'dialign/2.2.1'
, 'dssp/2000'
, 'ELP/2.0'
, 'EMBOSS/6.3.1'
, 'extend_align/1.0'
, 'fasta/3.6'
, 'fastaRename/1.0'
, 'fastDNAml/1.2.2'
, 'fetchSequences/1.0'
, 'forest2consense/NA'
, 'gblocks/0.91b'
, 'genscan/1.0'
, 'golden/1.1a'
, 'growthpred/v1.07'
, 'gruppi/1.0'
, 'hmmer/3.0'
, 'html4blast/1.7a'
, 'imogene/1.0-253'
, 'ktreedist/1.0'
, 'mafft/6.849'
, 'melting/4.2g'
, 'mfold/3.6'
, 'morePhyML/1.14'
, 'mreps/2.5'
, 'msa/2.1'
, 'msaprobs/0.9.4'
, 'mspcrunch/2.5'
, 'muscle/3.8.31'
, 'mview/1.49'
, 'newick-utils/1.6'
, 'njplot/20051109'
, 'nw_cat/GNU 7.4'
, 'pdb-lib/1.0'
, 'penncnv/2009.08.27'
, 'pftools/2.3'
, 'phylip/3.67'
, 'phyml/20130219.patch'
, 'pima/1.40'
, 'pratt/2.1'
, 'predator/2.1.2'
, 'primo/NA'
, 'prose/0.02a'
, 'psort/981201'
, 'puzzle/5.2'
, 'quicktree/1.1'
, 'rbvotree/1.00'
, 'repeatoire/1.0'
, 'repeats/1.1'
, 'saps/07-01-1995'
, 'IMP/r17125' # saxmerge
, 'scan_for_matches/97'
, 'scousin-tools/0.1'
, 'seq-gen/1.3.2'
, 'sig/1.0'
, 'smile/1.47'
, 'squizz/0.99b'
, 'tacg/4.1.0'
, 'taxoptimizer/1.1'
, 'tipdate/1.2'
, 'toppred/1.10'
, 'treealign/NA'
, 'trnascan/1.23'
, 'ViennaRNA/1.8.4'
, 'weighbor/1.2.1'
, 'wise2/2.2.0'
, 'xpound/NA'
, 'xxr/3.03'
]
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment