diff --git a/backend/metagenedb/apps/catalog/factory/function.py b/backend/metagenedb/apps/catalog/factory/function.py index a8b077ebc828c9b5bd0894b8fa3b3bff182cdf07..4d0a275f9576bb2e094fba0e33772503b9516038 100644 --- a/backend/metagenedb/apps/catalog/factory/function.py +++ b/backend/metagenedb/apps/catalog/factory/function.py @@ -1,36 +1,33 @@ -from factory import DjangoModelFactory, fuzzy -from faker import Factory +from factory import DjangoModelFactory, Faker, fuzzy from metagenedb.apps.catalog import models -from .fuzzy_base import FuzzyLowerText - -faker = Factory.create() SELECTED_SOURCE = [i[0] for i in models.Function.SOURCE_CHOICES] EGGNOG_VERSIONS = [i[0] for i in models.EggNOG.VERSION_CHOICES] -class BaseFunctionFactory(DjangoModelFactory): - function_id = FuzzyLowerText(prefix='function-', length=15) - - -class FunctionFactory(BaseFunctionFactory): +class FunctionFactory(DjangoModelFactory): class Meta: model = models.Function + function_id = Faker('bothify', text='function-####') source = fuzzy.FuzzyChoice(SELECTED_SOURCE) - function_id = FuzzyLowerText(prefix='function-', length=15) -class EggNOGFactory(BaseFunctionFactory): +class EggNOGFactory(DjangoModelFactory): + function_id = Faker('bothify', text='COG####') + name = Faker('bothify', text='COG-????????') + class Meta: model = models.EggNOG version = fuzzy.FuzzyChoice(EGGNOG_VERSIONS) -class KeggOrthologyFactory(BaseFunctionFactory): +class KeggOrthologyFactory(DjangoModelFactory): + function_id = Faker('bothify', text='K0####') + class Meta: model = models.KeggOrthology diff --git a/backend/metagenedb/apps/catalog/factory/gene.py b/backend/metagenedb/apps/catalog/factory/gene.py index 304d3edc8a12a90ee906b804da47c1933cc2151b..3637655ab605e2c8b6e0a3d59d61379869bba9ea 100644 --- a/backend/metagenedb/apps/catalog/factory/gene.py +++ b/backend/metagenedb/apps/catalog/factory/gene.py @@ -1,16 +1,12 @@ from factory import ( - DjangoModelFactory, RelatedFactory, SubFactory, fuzzy + DjangoModelFactory, Faker, RelatedFactory, SubFactory, fuzzy ) -from faker import Factory from metagenedb.apps.catalog import models -from .fuzzy_base import FuzzyLowerText -from .function import FunctionFactory +from .function import FunctionFactory, KeggOrthologyFactory, EggNOGFactory from .taxonomy import TaxonomyFactory -faker = Factory.create() - GENE_SOURCES = [i[0] for i in models.Gene.SOURCE_CHOICES] @@ -18,9 +14,9 @@ class GeneFactory(DjangoModelFactory): class Meta: model = models.Gene - gene_id = FuzzyLowerText(prefix='gene-', length=15) - name = fuzzy.FuzzyText(prefix='name-', length=15) - length = fuzzy.FuzzyInteger(200, 10000) + gene_id = Faker('bothify', text='gene-?#?#??-#??#?#') + name = Faker('bothify', text='Gene_name-##-????') + length = Faker('pyint', min_value=200, max_value=4200) source = fuzzy.FuzzyChoice(GENE_SOURCES) @@ -36,9 +32,17 @@ class GeneFunctionFactory(DjangoModelFactory): function = SubFactory(FunctionFactory) +class GeneKeggFactory(GeneFunctionFactory): + function = SubFactory(KeggOrthologyFactory) + + +class GeneEggNOGFactory(GeneFunctionFactory): + function = SubFactory(EggNOGFactory) + + class GeneWithKeggFactory(GeneFactory): - kegg = RelatedFactory(GeneFunctionFactory, 'gene', function__source='kegg') + kegg = RelatedFactory(GeneKeggFactory, 'gene') class GeneWithEggNOGFactory(GeneFactory): - eggnog = RelatedFactory(GeneFunctionFactory, 'gene', function__source='eggnog') + eggnog = RelatedFactory(GeneEggNOGFactory, 'gene') diff --git a/backend/metagenedb/apps/catalog/factory/taxonomy.py b/backend/metagenedb/apps/catalog/factory/taxonomy.py index 1cca70d4f2735b86cfdf6f8f6dcb8afaeffb021f..0f770047d2c1fd6524b172d5a29b145fee9c8ed3 100644 --- a/backend/metagenedb/apps/catalog/factory/taxonomy.py +++ b/backend/metagenedb/apps/catalog/factory/taxonomy.py @@ -30,6 +30,7 @@ class DbGenerator: """ Tree need to be an OrderedDict from higher to lower level """ + self.last_tax = None for rank, desc in tree.items(): if desc['tax_id'] not in self.created_ids: self.last_tax = TaxonomyFactory.create( diff --git a/backend/metagenedb/apps/catalog/management/commands/create_light_db.py b/backend/metagenedb/apps/catalog/management/commands/create_light_db.py index 0945397ff10d331a1123a206536ce6951d32b74d..16bf6fa96ac8cd41645f469fd6813ba44c0f64b7 100644 --- a/backend/metagenedb/apps/catalog/management/commands/create_light_db.py +++ b/backend/metagenedb/apps/catalog/management/commands/create_light_db.py @@ -1,34 +1,25 @@ import logging +from random import randint from django.core.management.base import BaseCommand +from metagenedb.apps.catalog.factory import GeneFactory, GeneWithEggNOGFactory, GeneWithKeggFactory from metagenedb.apps.catalog.factory.taxonomy import generate_simple_db as gen_tax_db from metagenedb.apps.catalog.models import ( - Gene, KeggOrthology, Taxonomy + Gene, Function, Taxonomy +) +from metagenedb.apps.catalog.management.commands.compute_stats import ( + ComputeStatistics, ComputeCounts, ComputeGeneLength, ComputeTaxonomyRepartition, ComputeTaxonomyPresence ) logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s') -logger = logging.getLogger(__name__) - - -def create_functions_db(): - KeggOrthology.objects.all().delete() - keggs_to_create = { - "K03556": { - 'name': 'malT', - 'long_name': "LuxR family transcriptional regulator, maltose regulon positive regulatory protein" - }, - "K02229": { - 'name': "cobG", - 'long_name': 'precorrin-3B synthase [EC:1.14.13.83]' - } - } - for kegg_id, values in keggs_to_create.items(): - KeggOrthology( - function_id=kegg_id, - name=values.get('name'), - long_name=values.get('long_name') - ).save() +logger = logging.getLogger() + + +def empty_db(): + Gene.objects.all().delete() + Taxonomy.objects.all().delete() + Function.objects.all().delete() def create_taxonomy_db(): @@ -37,13 +28,30 @@ def create_taxonomy_db(): def create_genes_db(): - pass + Gene.objects.all().delete() + GeneFactory.create_batch(50) + GeneWithEggNOGFactory.create_batch(15) + GeneWithKeggFactory.create_batch(12) + for tax in Taxonomy.objects.all(): + GeneFactory.create_batch(randint(1, 10), taxonomy=tax) + GeneWithEggNOGFactory.create(taxonomy=tax) + GeneWithKeggFactory.create(taxonomy=tax) + + +def compute_stats(): + ComputeStatistics('all').clean_db() + for gene_source in ['all', 'virgo', 'igc']: + ComputeCounts(gene_source).all() + ComputeGeneLength(gene_source).all() + ComputeTaxonomyRepartition(gene_source).all() + ComputeTaxonomyPresence(gene_source).all() def create_small_db(): - create_functions_db() + empty_db() create_taxonomy_db() create_genes_db() + compute_stats() class Command(BaseCommand): diff --git a/backend/metagenedb/apps/catalog/models/taxonomy.py b/backend/metagenedb/apps/catalog/models/taxonomy.py index bd2bfe48466c6438ab1f20229e91dbaf2292f003..ffb2a12fd9c11b8b9d1cf7238be73fde4893484b 100644 --- a/backend/metagenedb/apps/catalog/models/taxonomy.py +++ b/backend/metagenedb/apps/catalog/models/taxonomy.py @@ -62,12 +62,13 @@ class Taxonomy(models.Model): Build and save parental hierarchy for an entry """ hierarchy = {} - if self.name != 'root' and self.parent is not None: + if self.name != 'root': hierarchy[self.rank] = { 'tax_id': self.tax_id, 'name': self.name } - hierarchy = {**hierarchy, **getattr(self.parent, 'hierarchy', self.parent.build_hierarchy())} + if self.parent is not None: + hierarchy = {**hierarchy, **getattr(self.parent, 'hierarchy', self.parent.build_hierarchy())} self.hierarchy = hierarchy self.save() return hierarchy diff --git a/backend/scripts/populate_db/test_import_igc_data.py b/backend/scripts/populate_db/test_import_igc_data.py index d43344693b235cc1430cc14de64fd806221ece72..1eeb0ef92c9b78f8ba72c7fd2b4f9b0ec7a55254 100644 --- a/backend/scripts/populate_db/test_import_igc_data.py +++ b/backend/scripts/populate_db/test_import_igc_data.py @@ -297,12 +297,12 @@ class TestBuildTaxoMapping(APITestCase): self.assertDictEqual(self.import_igc_genes.phylum_mapping, expected_phylum_dict) -class TestBuildBuildFunctionCatalog(APITestCase): +class TestBuildFunctionCatalog(APITestCase): @classmethod def setUpTestData(cls): - cls.keggs = KeggOrthologyFactory.create_batch(100) - cls.eggnogs = EggNOGFactory.create_batch(100) + cls.keggs = KeggOrthologyFactory.create_batch(10) + cls.eggnogs = EggNOGFactory.create_batch(10) def setUp(self): self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')