load_kegg_ko.py 2.38 KB
Newer Older
1
2
3
4
5
#!/usr/bin/env python
import argparse
import logging
import requests
import sys
6
import time
7

8
from bioapi import MetageneDBCatalogFunctionAPI
9

10
from metagenedb.common.utils.chunks import generate_chunks
11
from metagenedb.common.utils.parsers import KEGGLineParser
12

13
14
logging.basicConfig()
logger = logging.getLogger()
15
16
17
18

KEGG_KO_LIST_API = "http://rest.kegg.jp/list/ko"


Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
19
20
class ImportKEGGKO(object):
    METAGENEDB_FUNCTION_API = MetageneDBCatalogFunctionAPI
21

Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
22
23
24
    def __init__(self, url, kegg_ko_list_api=KEGG_KO_LIST_API):
        self.kegg_ko_list_api = kegg_ko_list_api
        self.metagenedb_function_api = self.METAGENEDB_FUNCTION_API(base_url=url)
25
26
27
        self.processed_kegg = 0
        self.created_kegg = 0
        self.updated_kegg = 0
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
28
        self.skipped_kegg = 0
29

30
31
32
33
34
35
36
37
38
39
40
    def load_all_kegg_ko(self, chunk_size=1000):
        all_ko_response = requests.get(self.kegg_ko_list_api)
        all_ko_response.raise_for_status()
        all_ko = all_ko_response.text.splitlines()
        self.total_kegg_nb = len(all_ko)
        for chunk in generate_chunks(all_ko, chunk_size):
            ko_chunk = [KEGGLineParser.ko_list(i) for i in chunk]
            response = self.metagenedb_function_api.put(ko_chunk)
            self.created_kegg += response.get('created').get('count')
            self.updated_kegg += response.get('updated').get('count')
            self.processed_kegg += len(ko_chunk)
41
            logger.info("%s/%s KEGG KO processed so far...", self.processed_kegg, self.total_kegg_nb)
42
            time.sleep(1)
43
44
45
        logger.info("[DONE] %s/%s KEGG KO created.", self.created_kegg, self.total_kegg_nb)
        logger.info("[DONE] %s/%s KEGG KO updated.", self.updated_kegg, self.total_kegg_nb)
        logger.info("[DONE] %s/%s KEGG KO skipped.", self.skipped_kegg, self.total_kegg_nb)
46
47
48
49
50
51
52
53
54
55
56
57
58


def parse_arguments():
    """
    Defines parser.
    """
    parser = argparse.ArgumentParser(description=f'Populate KEGG KO database from {KEGG_KO_LIST_API}.')
    parser.add_argument('--url', help='base URL of the instance.', default='http://localhost/')
    parser.add_argument('-v', '--verbose', action='store_true')
    try:
        return parser.parse_args()
    except SystemExit:
        sys.exit(1)
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
59
60
61
62


def run():
    args = parse_arguments()
63
64
    if args.verbose:
        logger.setLevel(logging.INFO)
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
65
66
    import_kegg_ko = ImportKEGGKO(args.url)
    import_kegg_ko.load_all_kegg_ko()
67
68
69
70


if __name__ == "__main__":
    run()