Commit 5cf966b4 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

start using put from list for kegg script

parent 6eb0b0db
Pipeline #14452 passed with stages
in 2 minutes and 7 seconds
#!/usr/bin/env python
import argparse
import logging
import os
import requests
import sys
import time
from requests.exceptions import HTTPError
import django
from bioapi import MetageneDBCatalogFunctionAPI
from django.core.exceptions import ValidationError
from metagenedb.common.utils.chunks import generate_chunks
from metagenedb.common.utils.parsers import KEGGLineParser
# Before model import, we need to called django.setup() to Load apps
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "metagenedb.settings")
django.setup()
from metagenedb.apps.catalog.models import KeggOrthology # noqa
_LOGGER = logging.getLogger(__name__)
logging.basicConfig()
logger = logging.getLogger()
KEGG_KO_LIST_API = "http://rest.kegg.jp/list/ko"
def parse_arguments():
"""
Defines parser.
"""
parser = argparse.ArgumentParser(description=f'Populate KEGG KO database from {KEGG_KO_LIST_API}.')
parser.add_argument('--url', help='base URL of the instance.', default='http://localhost/')
try:
return parser.parse_args()
except SystemExit:
sys.exit(1)
class ImportKEGGKO(object):
METAGENEDB_FUNCTION_API = MetageneDBCatalogFunctionAPI
def __init__(self, url, kegg_ko_list_api=KEGG_KO_LIST_API):
self.kegg_ko_list_api = kegg_ko_list_api
self.metagenedb_function_api = self.METAGENEDB_FUNCTION_API(base_url=url)
self.inserted_kegg = 0
self.processed_kegg = 0
self.created_kegg = 0
self.updated_kegg = 0
self.skipped_kegg = 0
def _upsert_kegg_ko(self, kegg_ko):
......@@ -51,26 +35,41 @@ class ImportKEGGKO(object):
except HTTPError:
self.metagenedb_function_api.post(kegg_ko)
def load_all_kegg_ko(self):
all_ko = requests.get(self.kegg_ko_list_api)
all_ko.raise_for_status()
self.total_kegg_nb = len(all_ko.text.splitlines())
for line in all_ko.text.splitlines():
kegg_ko = KEGGLineParser.ko_list(line)
try:
self._upsert_kegg_ko(kegg_ko)
self.inserted_kegg += 1
except ValidationError as e:
self.skipped_kegg += 1
_LOGGER.warning(f"{e.__dict__} for function_id: {kegg_ko.get('function_id')}. Insertion skipped.")
if self.inserted_kegg > 0 and self.inserted_kegg % 100 == 0:
_LOGGER.info(f"{self.inserted_kegg}/{self.total_kegg_nb} KEGG KO inserted so far...")
_LOGGER.info(f"[DONE] {self.inserted_kegg}/{self.total_kegg_nb} KEGG KO inserted.")
_LOGGER.info(f"[DONE] {self.skipped_kegg}/{self.total_kegg_nb} KEGG KO skipped.")
def load_all_kegg_ko(self, chunk_size=1000):
all_ko_response = requests.get(self.kegg_ko_list_api)
all_ko_response.raise_for_status()
all_ko = all_ko_response.text.splitlines()
self.total_kegg_nb = len(all_ko)
for chunk in generate_chunks(all_ko, chunk_size):
ko_chunk = [KEGGLineParser.ko_list(i) for i in chunk]
response = self.metagenedb_function_api.put(ko_chunk)
self.created_kegg += response.get('created').get('count')
self.updated_kegg += response.get('updated').get('count')
self.processed_kegg += len(ko_chunk)
logger.info(f"%s/%s KEGG KO processed so far...", self.processed_kegg, self.total_kegg_nb)
time.sleep(1)
logger.info(f"[DONE] %s/%s KEGG KO created.", self.created_kegg, self.total_kegg_nb)
logger.info(f"[DONE] %s/%s KEGG KO updated.", self.updated_kegg, self.total_kegg_nb)
logger.info(f"[DONE] %s/%s KEGG KO skipped.", self.skipped_kegg, self.total_kegg_nb)
def parse_arguments():
"""
Defines parser.
"""
parser = argparse.ArgumentParser(description=f'Populate KEGG KO database from {KEGG_KO_LIST_API}.')
parser.add_argument('--url', help='base URL of the instance.', default='http://localhost/')
parser.add_argument('-v', '--verbose', action='store_true')
try:
return parser.parse_args()
except SystemExit:
sys.exit(1)
def run():
args = parse_arguments()
if args.verbose:
logger.setLevel(logging.INFO)
import_kegg_ko = ImportKEGGKO(args.url)
import_kegg_ko.load_all_kegg_ko()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment