Commit 9201ea5e authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

Add script to build hierarchy from backend

parent 266e125e
Pipeline #17617 passed with stages
in 2 minutes and 46 seconds
import logging
from django.core.management.base import BaseCommand
from metagenedb.apps.catalog.models import Taxonomy
logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)
SELECT_RELATED_PARENT = "parent{}".format("__parent" * 40)
class HierarchyBuilder:
def __init__(self, queryset):
self.queryset = queryset
self.total_tax = queryset.count()
self.processed_tax = 0
self.hierarchy_built = 0
self.hierarchy_failed = 0
def build_all(self, chunk_size=8000):
logger.info("Building all hierarchy for all %s taxonomy items...", self.total_tax)
for taxonomy in self.queryset.iterator(chunk_size=chunk_size):
try:
hierarchy = taxonomy.parental_hierarchy # noqa
self.hierarchy_built += 1
except Exception:
self.hierarchy_failed += 1
self.processed_tax += 1
if self.processed_tax % 10000 == 0:
logger.info("%s/%s Taxonomy processed so far...", self.processed_tax, self.total_tax)
break
logger.info("[DONE] %s/%s Hierarchy built.", self.hierarchy_built, self.total_tax)
logger.info("[DONE] %s/%s Hierarchy build skipped.", self.hierarchy_failed, self.total_tax)
class Command(BaseCommand):
help = 'Build hierarchy for taxonomy entries.'
def set_logger_level(self, verbosity):
if verbosity > 2:
logger.setLevel(logging.DEBUG)
elif verbosity > 1:
logger.setLevel(logging.INFO)
def get_queryset(self):
return Taxonomy.objects.select_related(
SELECT_RELATED_PARENT, "superkingdom", "kingdom", "phylum", "class_rank",
"order", "family", "genus", "species"
).all()
def handle(self, *args, **options):
self.set_logger_level(int(options['verbosity']))
hierarchy_builder = HierarchyBuilder(self.get_queryset())
hierarchy_builder.build_all()
......@@ -54,10 +54,10 @@ spec:
name: postgresql
resources:
requests:
memory: "256Mi"
memory: "512Mi"
cpu: "100m"
limits:
memory: "512Mi"
memory: "1024Mi"
cpu: "500m"
volumeMounts:
- name: postgresql
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment