Commit 59aeb54a authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

add script to populate DB

parent 45bc993a
import argparse
import logging
import os
import sys
from itertools import islice
import django
from django.core.exceptions import ValidationError
# Before model import, we need to called django.setup() to Load apps
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "metagenedb.settings")
django.setup()
from metagenedb.apps.catalog.models import Gene
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)
def create_gene(raw_line):
gene_info = raw_line.rstrip().split('\t')
gene = Gene(gene_id=gene_info[1],
gene_length=gene_info[2],
taxonomic_genus=gene_info[6],
taxonomic_phylum=gene_info[5])
return gene
def insert_gene(gene):
gene.full_clean()
gene.save()
def insert_gene_list(chunk_genes):
for i in chunk_genes:
try:
gene = create_gene(i)
insert_gene(gene)
except ValidationError as e:
_LOGGER.warning(f"{e.__dict__} for gene_id: {gene.gene_id}. Insertion skipped.")
def load_annotation_file_to_db_in_chunks(annotation_file, chunk_size=100000):
loaded_genes = 0
with open(annotation_file, 'r') as file:
while True:
chunk_genes = list(islice(file, chunk_size))
if not chunk_genes:
break
loaded_genes += len(chunk_genes)
insert_gene_list(chunk_genes)
_LOGGER.info(f"{loaded_genes} genes processed so far...")
_LOGGER.info(f"[DONE] {loaded_genes} genes processed.")
def parse_arguments():
"""
Defines parser.
"""
parser = argparse.ArgumentParser(description='Populate database from a given IGC annotation file.')
# Common arguments for analysis and annotations
parser.add_argument('annotation', help='IGC annotation file')
parser.add_argument('--delete_all', action='store_true', help='Empty database before insertion.')
try:
return parser.parse_args()
except SystemExit:
sys.exit(1)
def run():
args = parse_arguments()
if args.delete_all:
Gene.objects.all().delete()
load_annotation_file_to_db_in_chunks(args.annotation)
if __name__ == "__main__":
run()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment