biodocs2mongo.py 9.86 KB
Newer Older
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
1
2
3
#! /local/gensoft2/adm/bin/python

from __future__ import print_function
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
4
5

import argparse
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
6
7
import ConfigParser
import os
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
8
9
import pymongo
import re
10
import ssl
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
11
import string
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
12
import sys
13
14


Eric  DEVEAUD's avatar
Eric DEVEAUD committed
15
from pprint import pprint
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
16

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
17
18
19
20
#### TODO !!!!!
# 
# add config entry for use module.
# if set => check program listing from biodocs versus program listing from modules
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
21
22
23



Eric  DEVEAUD's avatar
Eric DEVEAUD committed
24
#---- import homebrew modules
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
25

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
26
27
28
PYMODULEDIR="/local/gensoft2/adm/share/gensoft/pymodules"
sys.path.insert(0, PYMODULEDIR)
import BiodocParser
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
29

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
30
#---- some hugly global variables
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
31

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
32
33
34
35
36
DAT='.'
ERRFH = sys.stderr
LOGFH = sys.stdout
FATAL  = 1
WARN   = 0
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
37

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
38
def err(exit_val, *msg):
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
39
    head=['Warning', 'Error']
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
40
    print ("%s: %s" % (head[exit_val], " - ".join(map(str, msg))), file=ERRFH)
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
41
42
43
44
45
46
    if exit_val:
        sys.exit(exit_val)
    return None

def log(*msg):
    if VERBOSE:
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
47
        print ("%s" %(' '.join(map(str, msg))), file=LOGFH)
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
48
49
50
        LOGFH.flush()


Eric  DEVEAUD's avatar
Eric DEVEAUD committed
51
52
def _id_generator(*arg):
    return ID_SEPARATOR.join(arg)
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
53

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
54
def check_missing_biodocs(biodocs_lst):
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
55
    ret = []
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
56
57
58
    for biodoc in biodocs_lst:
        if not os.path.isfile(biodoc):
            err(WARN, biodoc, 'no such file')
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
59
            continue
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
60
        ret.append(biodoc)
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
61
62
    return ret

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def get_biodocs(packdir):
    # filter to keep only pack_version 
    pack_name = os.path.basename(packdir)
    pack_version_lst = [ os.path.join(packdir, d) for d in os.listdir(packdir) if d.startswith(pack_name) ]
    biodocs_lst = [ os.path.join(d, 'BIODOCS.yaml') for d in pack_version_lst ]
    return check_missing_biodocs(biodocs_lst)

def citation_key_translate(refs_col):
    # mapping = {key in yaml : key expected in mongo  }
    mapping = {'ID': 'ID'
              ,'idType' : 'IDtype'
              ,'title': 'citation'
              }
    for ref in refs_col:
        for k, v in mapping.items():
            ref[v] = ref.pop(k)
    return refs_col


def info2package(info):
    pack_name =  info['name']
    pack_id = _id_generator('pack', pack_name)
    pack_doc = {
         '_id': pack_id
       , 'type' : 'package'
       , 'name' : info['name']
       , 'description' : info['description']
       , 'home' : info['home']
       , 'source' : info['sources']
       , 'authors' : info['authors']
       , 'categories' : info['operations'] + info['topics']
       , 'topics' : info['topics']
       , 'operations': info['operations']
       , 'collections' : [info['origin']]
       , 'history' : info['history']
       , 'library' : info['library']
       , 'private' : info['private']
       , 'references' : citation_key_translate(info['references'])
    }
    return pack_doc
   

def info2pack_version(info): 
    pack_name =  info['name']
    pack_id = _id_generator("pack", pack_name)
    pack_version = info['version']
    pack_version_id = _id_generator("pack", pack_name, pack_version)
    pack_version_doc = {
        '_id' : pack_version_id
      , 'type' : 'packageVersion'
      , 'package' : pack_id
      , 'default' : info['default']
      , 'version' : info['version']
      , 'depends' : info['depends']
      , 'doc'     : {'docs' : []
                   , 'html' : info['htmldocs']
                   , 'man'  : []
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
120
                   }
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
      }
    return pack_version_doc


def info2progs(info):
    progs = []
    pack_name =  info['name']
    pack_version = info['version']
 
    for elem in info['programs']:
        new = {}
        new['_id'] = _id_generator("prog", pack_name, pack_version, elem['name'])
        new['type'] = 'program'
        new['name'] = elem['name']
        new['description'] = elem['description'] if elem['description'] else ''
        new['packageVersion'] = _id_generator("pack", pack_name, pack_version)
        new['categories'] = []
        new['doc'] = {'html': [], 'man': [elem['manpages']]}
        progs.append(new)
    return progs


def info2mobyle(info):
    # import mobyledefs
    ##---- should no longer used with Biodocs.yaml
    # is the version used by mobyle ?
    # if "%s/%s" %(pack_name, pack_version) not in mobyledefs.pack_info:
    #     return mobyle

    mobyle = []
    pack_name =  info['name']
    pack_version = info['version']
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
153
154
155
    pattern= '(?P<name>[\w-]+)\s*\[(?P<desc>.*)\]'
    auto = re.compile(pattern)

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
    for prg in info['programs']:
        prog_name = prg['name']
        # something to document ?
        web = prg.get('web', False)
        if not web:
            continue
        for web_info in web:
            interface = {}    
            match = auto.match(web_info)
            if match is None:
                err(WARN, 'invalib web description', web_info)
            i_name = match.group('name')
            i_desc = match.group('desc')
            interface['_id'] = _id_generator("mobyle", pack_name, pack_version, i_name)
            interface['type'] = 'mobyle'
            interface['package'] = _id_generator("pack", pack_name, pack_version)
            interface['programs'] = [_id_generator("prog", pack_name, pack_version, prog_name)]
            interface['name'] = i_name
            interface['description'] = i_desc
            interface['url'] = "%s#forms::%s" % (MOBYLEURL, interface['name'])
            mobyle.append(interface)
    return mobyle 


def mongo_connect(host, port, db, col):
    log('connect to', host, 'on port', port)
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
182

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
183
184
185
186
187
188
189
    try:
        client = pymongo.MongoClient(host, port, j=True, ssl=True, ssl_cert_reqs=ssl.CERT_NONE)
    except pymongo.errors.ConnectionFailure as msg:
        err(FATAL, "mongodb %s/%s" %(host, port), msg)
    db = client[db]
    col = db[col]
    return col
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
190
191


192
193
194
195
196
197
198
199
200
def escapePackName(name):
    ret = ''
    for c in name:
        if c in string.punctuation:
            ret += '\\'
        ret += c
    return ret


Eric  DEVEAUD's avatar
Eric DEVEAUD committed
201
def purge(col, pack_id, pack_version_id):
202
    '''
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
203
204
    remove any previous package related documents
    not the fatest but the safest.
205
    '''
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
    # remove package info
    ret = col.remove({'type': 'package', '_id': pack_id}) 
    # remove packageVersion info
    ret = col.remove({'type': 'packageVersion', '_id': pack_version_id}) 
    # remove program info 
    ret = col.remove({'type': 'program', 'packageVersion': pack_version_id})
    # remove mobyle infos
    ret = col.remove({'type': 'mobyle', 'package': pack_version_id})

def insert(col, pack_name, pack_version, *args):
    entries = []
    for e in args:
        e = e if isinstance(e, list) else [e]
        entries.extend(e)
        
    print("\tinsert %s %s" %( pack_name, pack_version), end='')
    n = 0
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
223
    try:
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
224
225
226
227
228
229
230
231
        n = col.insert(entries)
    except pymongo.errors.DuplicateKeyError as msg:
        err(WARN, "unable to insert from", pack_name, pack_version , msg)
    except pymongo.errors as msg:
        err(WARN, msg)
    msg = 'OK' if n else "Failed"
    print("::\t%s" %(msg))
        
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
232
233
234

if __name__ == '__main__':

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
    #---- default values
    config_file = os.path.join(DAT, 'config.cfg')
    cfg = ConfigParser.SafeConfigParser()
    if not cfg.read(config_file):
        err(FATAL, 'No configuration file found')
  
    ID_SEPARATOR = cfg.get('DEFAULT', 'ID_SEPARATOR') 
    VERBOSE = cfg.get('DEFAULT', 'VERBOSE') 
    HOST = cfg.get('MONGO', 'HOST')
    PORT = int(cfg.get('MONGO', 'PORT'))
    DB = cfg.get('MONGO', 'DB')
    COL = cfg.get('MONGO', 'COL')
    JOURNALING = cfg.get('MONGO', 'JOURNALING')
    MOBYLEURL = cfg.get('MOBYLE', 'MOBYLEURL')
    
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
250
    #---- get command line
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
251
252
    parser = argparse.ArgumentParser(description='Gensoft to bioweb mongo db conversion tool.'
                                    ,epilog="example: biodocs2mongo.py /local/gensoft2/inst/toppred" )
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
253
254
255
256
257
258
259
260
261
262
    parser.add_argument('--host', action="store"
                        , type=str
                        , default=HOST
                        , dest='db_host',
                        help='mongo server host.')
    parser.add_argument('--port',action='store'
                        , type=int
                        , default=PORT
                        , dest='db_port',
                        help='mongo server port.')
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
263
264
265
266
    parser.add_argument('--test', action='store_true'
                       , default = False
                       , dest='test'
                       , help='perform the job but does not insert on DB')
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
267
268
269
    parser.add_argument('--db'
                        ,action='store'
                        , type=str
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
270
                        , default=DB
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
271
272
273
274
                        , dest='db_name',
                        help='mongo DB to work with.')
    parser.add_argument('-v'
                        , action='store_true'
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
275
                        , default=VERBOSE
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
276
277
                        , dest='VERBOSE', \
                        help='Turns verbosity on.')
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
278
    parser.add_argument('args', nargs='*', help='inst dir of package to document')
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
279
    cmdline = parser.parse_args()
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
280
281
282
283
   
    #---- update config from cmdline options.
    HOST=cmdline.db_host
    PORT=cmdline.db_port
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
284
    VERBOSE=cmdline.VERBOSE
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302


    #---- get mongo connection.
    col =  mongo_connect(HOST, PORT, DB, COL)

    for packdir in cmdline.args:
        log(packdir)
        for biodoc in get_biodocs(packdir):
            info = BiodocParser.get_packversion(biodoc)
            pack_name =  info['name']
            pack_id = _id_generator("pack", pack_name)
            pack_version = info['version'] 
            pack_version_id = _id_generator("pack", pack_name, pack_version)
     
            pack_doc = info2package(info)
            pack_version_doc = info2pack_version(info)
            program_docs = info2progs(info)
            mobyle_docs = info2mobyle(info)
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
303
            if cmdline.test:
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
304
305
306
307
308
309
310
311
                print(">>>>>>>>>>>>>> pack")
                pprint(pack_doc)
                print(">>>>>>>>>>>>>> packversion")
                pprint(pack_version_doc)
                print(">>>>>>>>>>>>>> programs")
                pprint(program_docs)
                print(">>>>>>>>>>>>>> mobyle")
                pprint(mobyle_docs)
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
312
                continue
Eric  DEVEAUD's avatar
Eric DEVEAUD committed
313

Eric  DEVEAUD's avatar
Eric DEVEAUD committed
314
315
316
            purge(col, pack_id, pack_version_id)
            insert(col, pack_name, pack_version,  pack_doc, pack_version_doc, program_docs, mobyle_docs)