Commit 2cc71fd9 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

finish command to generate light db

parent 931f67e8
Pipeline #33341 passed with stages
in 3 minutes and 14 seconds
from factory import DjangoModelFactory, fuzzy
from faker import Factory
from factory import DjangoModelFactory, Faker, fuzzy
from metagenedb.apps.catalog import models
from .fuzzy_base import FuzzyLowerText
faker = Factory.create()
SELECTED_SOURCE = [i[0] for i in models.Function.SOURCE_CHOICES]
EGGNOG_VERSIONS = [i[0] for i in models.EggNOG.VERSION_CHOICES]
class BaseFunctionFactory(DjangoModelFactory):
function_id = FuzzyLowerText(prefix='function-', length=15)
class FunctionFactory(BaseFunctionFactory):
class FunctionFactory(DjangoModelFactory):
class Meta:
model = models.Function
function_id = Faker('bothify', text='function-####')
source = fuzzy.FuzzyChoice(SELECTED_SOURCE)
function_id = FuzzyLowerText(prefix='function-', length=15)
class EggNOGFactory(BaseFunctionFactory):
class EggNOGFactory(DjangoModelFactory):
function_id = Faker('bothify', text='COG####')
name = Faker('bothify', text='COG-????????')
class Meta:
model = models.EggNOG
version = fuzzy.FuzzyChoice(EGGNOG_VERSIONS)
class KeggOrthologyFactory(BaseFunctionFactory):
class KeggOrthologyFactory(DjangoModelFactory):
function_id = Faker('bothify', text='K0####')
class Meta:
model = models.KeggOrthology
......
from factory import (
DjangoModelFactory, RelatedFactory, SubFactory, fuzzy
DjangoModelFactory, Faker, RelatedFactory, SubFactory, fuzzy
)
from faker import Factory
from metagenedb.apps.catalog import models
from .fuzzy_base import FuzzyLowerText
from .function import FunctionFactory
from .function import FunctionFactory, KeggOrthologyFactory, EggNOGFactory
from .taxonomy import TaxonomyFactory
faker = Factory.create()
GENE_SOURCES = [i[0] for i in models.Gene.SOURCE_CHOICES]
......@@ -18,9 +14,9 @@ class GeneFactory(DjangoModelFactory):
class Meta:
model = models.Gene
gene_id = FuzzyLowerText(prefix='gene-', length=15)
name = fuzzy.FuzzyText(prefix='name-', length=15)
length = fuzzy.FuzzyInteger(200, 10000)
gene_id = Faker('bothify', text='gene-?#?#??-#??#?#')
name = Faker('bothify', text='Gene_name-##-????')
length = Faker('pyint', min_value=200, max_value=4200)
source = fuzzy.FuzzyChoice(GENE_SOURCES)
......@@ -36,9 +32,17 @@ class GeneFunctionFactory(DjangoModelFactory):
function = SubFactory(FunctionFactory)
class GeneKeggFactory(GeneFunctionFactory):
function = SubFactory(KeggOrthologyFactory)
class GeneEggNOGFactory(GeneFunctionFactory):
function = SubFactory(EggNOGFactory)
class GeneWithKeggFactory(GeneFactory):
kegg = RelatedFactory(GeneFunctionFactory, 'gene', function__source='kegg')
kegg = RelatedFactory(GeneKeggFactory, 'gene')
class GeneWithEggNOGFactory(GeneFactory):
eggnog = RelatedFactory(GeneFunctionFactory, 'gene', function__source='eggnog')
eggnog = RelatedFactory(GeneEggNOGFactory, 'gene')
......@@ -30,6 +30,7 @@ class DbGenerator:
"""
Tree need to be an OrderedDict from higher to lower level
"""
self.last_tax = None
for rank, desc in tree.items():
if desc['tax_id'] not in self.created_ids:
self.last_tax = TaxonomyFactory.create(
......
import logging
from random import randint
from django.core.management.base import BaseCommand
from metagenedb.apps.catalog.factory import GeneFactory, GeneWithEggNOGFactory, GeneWithKeggFactory
from metagenedb.apps.catalog.factory.taxonomy import generate_simple_db as gen_tax_db
from metagenedb.apps.catalog.models import (
Gene, KeggOrthology, Taxonomy
Gene, Function, Taxonomy
)
from metagenedb.apps.catalog.management.commands.compute_stats import (
ComputeStatistics, ComputeCounts, ComputeGeneLength, ComputeTaxonomyRepartition, ComputeTaxonomyPresence
)
logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)
def create_functions_db():
KeggOrthology.objects.all().delete()
keggs_to_create = {
"K03556": {
'name': 'malT',
'long_name': "LuxR family transcriptional regulator, maltose regulon positive regulatory protein"
},
"K02229": {
'name': "cobG",
'long_name': 'precorrin-3B synthase [EC:1.14.13.83]'
}
}
for kegg_id, values in keggs_to_create.items():
KeggOrthology(
function_id=kegg_id,
name=values.get('name'),
long_name=values.get('long_name')
).save()
logger = logging.getLogger()
def empty_db():
Gene.objects.all().delete()
Taxonomy.objects.all().delete()
Function.objects.all().delete()
def create_taxonomy_db():
......@@ -37,13 +28,30 @@ def create_taxonomy_db():
def create_genes_db():
pass
Gene.objects.all().delete()
GeneFactory.create_batch(50)
GeneWithEggNOGFactory.create_batch(15)
GeneWithKeggFactory.create_batch(12)
for tax in Taxonomy.objects.all():
GeneFactory.create_batch(randint(1, 10), taxonomy=tax)
GeneWithEggNOGFactory.create(taxonomy=tax)
GeneWithKeggFactory.create(taxonomy=tax)
def compute_stats():
ComputeStatistics('all').clean_db()
for gene_source in ['all', 'virgo', 'igc']:
ComputeCounts(gene_source).all()
ComputeGeneLength(gene_source).all()
ComputeTaxonomyRepartition(gene_source).all()
ComputeTaxonomyPresence(gene_source).all()
def create_small_db():
create_functions_db()
empty_db()
create_taxonomy_db()
create_genes_db()
compute_stats()
class Command(BaseCommand):
......
......@@ -62,12 +62,13 @@ class Taxonomy(models.Model):
Build and save parental hierarchy for an entry
"""
hierarchy = {}
if self.name != 'root' and self.parent is not None:
if self.name != 'root':
hierarchy[self.rank] = {
'tax_id': self.tax_id,
'name': self.name
}
hierarchy = {**hierarchy, **getattr(self.parent, 'hierarchy', self.parent.build_hierarchy())}
if self.parent is not None:
hierarchy = {**hierarchy, **getattr(self.parent, 'hierarchy', self.parent.build_hierarchy())}
self.hierarchy = hierarchy
self.save()
return hierarchy
......
......@@ -297,12 +297,12 @@ class TestBuildTaxoMapping(APITestCase):
self.assertDictEqual(self.import_igc_genes.phylum_mapping, expected_phylum_dict)
class TestBuildBuildFunctionCatalog(APITestCase):
class TestBuildFunctionCatalog(APITestCase):
@classmethod
def setUpTestData(cls):
cls.keggs = KeggOrthologyFactory.create_batch(100)
cls.eggnogs = EggNOGFactory.create_batch(100)
cls.keggs = KeggOrthologyFactory.create_batch(10)
cls.eggnogs = EggNOGFactory.create_batch(10)
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment