Skip to content
Snippets Groups Projects
Select Git revision
  • d2ffd8c9a04c2d1bab11180af60d1ac1120335bf
  • dev default
  • improve-source
  • improve-db-queries
  • master protected
5 results

load_kegg_ko.py

Blame
  • load_kegg_ko.py 2.68 KiB
    #!/usr/bin/env python
    import argparse
    import logging
    import requests
    import sys
    import time
    
    from bioapi import MetageneDBCatalogFunctionAPI
    
    from metagenedb.common.utils.chunks import generate_chunks
    from metagenedb.common.utils.parsers import KEGGLineParser
    
    logging.basicConfig()
    logger = logging.getLogger()
    
    KEGG_KO_LIST_API = "http://rest.kegg.jp/list/ko"
    
    
    class ImportKEGGKO(object):
        METAGENEDB_FUNCTION_API = MetageneDBCatalogFunctionAPI
        ORM_SOURCE_KEY = 'source'
        KEGG_SOURCE = 'kegg'
    
        def __init__(self, url, jwt_token, kegg_ko_list_api=KEGG_KO_LIST_API):
            self.kegg_ko_list_api = kegg_ko_list_api
            self.metagenedb_function_api = self.METAGENEDB_FUNCTION_API(base_url=url, jwt_token=jwt_token)
            self.processed_kegg = 0
            self.created_kegg = 0
            self.updated_kegg = 0
            # self.skipped_kegg = 0
    
        def load_all_kegg_ko(self, chunk_size=1000):
            all_ko_response = requests.get(self.kegg_ko_list_api)
            all_ko_response.raise_for_status()
            all_ko = all_ko_response.text.splitlines()
            self.total_kegg_nb = len(all_ko)
            for chunk in generate_chunks(all_ko, chunk_size):
                ko_chunk = [KEGGLineParser.ko_list(i) for i in chunk]
                for i in ko_chunk:
                    i.update({self.ORM_SOURCE_KEY: self.KEGG_SOURCE})
                response = self.metagenedb_function_api.put(ko_chunk)
                self.created_kegg += response.get('created').get('count')
                self.updated_kegg += response.get('updated').get('count')
                self.processed_kegg += len(ko_chunk)
                logger.info("%s/%s KEGG KO processed so far...", self.processed_kegg, self.total_kegg_nb)
                time.sleep(1)
            logger.info("[DONE] %s/%s KEGG KO created.", self.created_kegg, self.total_kegg_nb)
            logger.info("[DONE] %s/%s KEGG KO updated.", self.updated_kegg, self.total_kegg_nb)
            # logger.info("[DONE] %s/%s KEGG KO skipped.", self.skipped_kegg, self.total_kegg_nb)
    
    
    def parse_arguments():
        """
        Defines parser.
        """
        parser = argparse.ArgumentParser(description=f'Populate KEGG KO database from {KEGG_KO_LIST_API}.')
        parser.add_argument('--url', help='base URL of the instance.', default='http://localhost/')
        parser.add_argument('-t', '--jwt_token', help='your JWT token obtain from web app', required=True)
        parser.add_argument('-v', '--verbose', action='store_true')
        try:
            return parser.parse_args()
        except SystemExit:
            sys.exit(1)
    
    
    def run():
        args = parse_arguments()
        if args.verbose:
            logger.setLevel(logging.INFO)
        import_kegg_ko = ImportKEGGKO(args.url, args.jwt_token)
        import_kegg_ko.load_all_kegg_ko()
    
    
    if __name__ == "__main__":
        run()