diff --git a/backend/metagenedb/apps/catalog/management/__init__.py b/backend/metagenedb/apps/catalog/management/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/metagenedb/apps/catalog/management/commands/__init__.py b/backend/metagenedb/apps/catalog/management/commands/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/metagenedb/apps/catalog/management/commands/build_hierarchy.py b/backend/metagenedb/apps/catalog/management/commands/build_hierarchy.py new file mode 100644 index 0000000000000000000000000000000000000000..e6c6a18e30c56d2ceb0ddada38c9f8501d9294f7 --- /dev/null +++ b/backend/metagenedb/apps/catalog/management/commands/build_hierarchy.py @@ -0,0 +1,56 @@ +import logging + +from django.core.management.base import BaseCommand + +from metagenedb.apps.catalog.models import Taxonomy + +logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s') +logger = logging.getLogger(__name__) + +SELECT_RELATED_PARENT = "parent{}".format("__parent" * 40) + + +class HierarchyBuilder: + + def __init__(self, queryset): + self.queryset = queryset + self.total_tax = queryset.count() + self.processed_tax = 0 + self.hierarchy_built = 0 + self.hierarchy_failed = 0 + + def build_all(self, chunk_size=8000): + logger.info("Building all hierarchy for all %s taxonomy items...", self.total_tax) + for taxonomy in self.queryset.iterator(chunk_size=chunk_size): + try: + hierarchy = taxonomy.parental_hierarchy # noqa + self.hierarchy_built += 1 + except Exception: + self.hierarchy_failed += 1 + self.processed_tax += 1 + if self.processed_tax % 10000 == 0: + logger.info("%s/%s Taxonomy processed so far...", self.processed_tax, self.total_tax) + break + logger.info("[DONE] %s/%s Hierarchy built.", self.hierarchy_built, self.total_tax) + logger.info("[DONE] %s/%s Hierarchy build skipped.", self.hierarchy_failed, self.total_tax) + + +class Command(BaseCommand): + help = 'Build hierarchy for taxonomy entries.' + + def set_logger_level(self, verbosity): + if verbosity > 2: + logger.setLevel(logging.DEBUG) + elif verbosity > 1: + logger.setLevel(logging.INFO) + + def get_queryset(self): + return Taxonomy.objects.select_related( + SELECT_RELATED_PARENT, "superkingdom", "kingdom", "phylum", "class_rank", + "order", "family", "genus", "species" + ).all() + + def handle(self, *args, **options): + self.set_logger_level(int(options['verbosity'])) + hierarchy_builder = HierarchyBuilder(self.get_queryset()) + hierarchy_builder.build_all() diff --git a/ci/kubernetes/postgresql.yaml b/ci/kubernetes/postgresql.yaml index a650480131eb8c6cffb00b46b144ffa1456a8e37..a49b624b87c2f1798a443081bb2d6a1de031e217 100644 --- a/ci/kubernetes/postgresql.yaml +++ b/ci/kubernetes/postgresql.yaml @@ -54,10 +54,10 @@ spec: name: postgresql resources: requests: - memory: "256Mi" + memory: "512Mi" cpu: "100m" limits: - memory: "512Mi" + memory: "1024Mi" cpu: "500m" volumeMounts: - name: postgresql