load_kegg_ko.py 2.71 KB
Newer Older
1
2
3
4
5
6
#!/usr/bin/env python
import argparse
import logging
import os
import requests
import sys
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
7
from requests.exceptions import HTTPError
8
9

import django
10
from bioapi import MetageneDBCatalogFunctionAPI
11
12
from django.core.exceptions import ValidationError

13
from metagenedb.common.utils.parsers import KEGGLineParser
14

15
16
17
18
# Before model import, we need to called django.setup() to Load apps
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "metagenedb.settings")
django.setup()

Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
19
from metagenedb.apps.catalog.models import KeggOrthology  # noqa
20
21
22
23
24
25
26
27
28
29
30
31

logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)

KEGG_KO_LIST_API = "http://rest.kegg.jp/list/ko"


def parse_arguments():
    """
    Defines parser.
    """
    parser = argparse.ArgumentParser(description=f'Populate KEGG KO database from {KEGG_KO_LIST_API}.')
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
32
    parser.add_argument('--url', help='base URL of the instance.', default='http://localhost/')
33
34
35
36
37
38
    try:
        return parser.parse_args()
    except SystemExit:
        sys.exit(1)


Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
39
40
class ImportKEGGKO(object):
    METAGENEDB_FUNCTION_API = MetageneDBCatalogFunctionAPI
41

Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
42
43
44
45
46
    def __init__(self, url, kegg_ko_list_api=KEGG_KO_LIST_API):
        self.kegg_ko_list_api = kegg_ko_list_api
        self.metagenedb_function_api = self.METAGENEDB_FUNCTION_API(base_url=url)
        self.inserted_kegg = 0
        self.skipped_kegg = 0
47

Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
48
    def _upsert_kegg_ko(self, kegg_ko):
49
        try:
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
            self.metagenedb_function_api.get(kegg_ko.get('function_id'))  # Try to get obj to check if it exists
            self.metagenedb_function_api.put(kegg_ko.get('function_id'), kegg_ko)
        except HTTPError:
            self.metagenedb_function_api.post(kegg_ko)

    def load_all_kegg_ko(self):
        all_ko = requests.get(self.kegg_ko_list_api)
        all_ko.raise_for_status()
        self.total_kegg_nb = len(all_ko.text.splitlines())
        for line in all_ko.text.splitlines():
            kegg_ko = KEGGLineParser.ko_list(line)
            try:
                self._upsert_kegg_ko(kegg_ko)
                self.inserted_kegg += 1
            except ValidationError as e:
                self.skipped_kegg += 1
                _LOGGER.warning(f"{e.__dict__} for function_id: {kegg_ko.get('function_id')}. Insertion skipped.")
            if self.inserted_kegg > 0 and self.inserted_kegg % 100 == 0:
                _LOGGER.info(f"{self.inserted_kegg}/{self.total_kegg_nb} KEGG KO inserted so far...")
        _LOGGER.info(f"[DONE] {self.inserted_kegg}/{self.total_kegg_nb} KEGG KO inserted.")
        _LOGGER.info(f"[DONE] {self.skipped_kegg}/{self.total_kegg_nb} KEGG KO skipped.")


def run():
    args = parse_arguments()
    import_kegg_ko = ImportKEGGKO(args.url)
    import_kegg_ko.load_all_kegg_ko()
77
78
79
80


if __name__ == "__main__":
    run()