Commit eb644605 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

start script to create light db

parent fce01755
Pipeline #33212 failed with stages
in 3 minutes and 4 seconds
from collections import OrderedDict
from factory import DjangoModelFactory, fuzzy
from faker import Factory
......@@ -25,44 +27,47 @@ class DbGenerator:
self.created_ids = set() # store already created IDs to skip them
def generate_db_from_tree(self, tree):
"""
Tree need to be an OrderedDict from higher to lower level
"""
for rank, desc in tree.items():
if desc['tax_id'] not in self.created_ids:
TaxonomyFactory.create(
self.last_tax = TaxonomyFactory.create(
tax_id=desc['tax_id'],
name=desc['name'],
rank=rank,
parent=getattr(self, "last_tax", None)
)
self.created_ids.add(desc['tax_id'])
self.last_tax.build_hierarchy()
def _generate_lactobacillus_db(db_generator):
"""
Generate db with few ranks corresponding to Lactobacillus genus
"""
tree = {
"class": {"name": "Bacilli", "tax_id": "91061"},
"genus": {"name": "Lactobacillus", "tax_id": "1578"},
"order": {"name": "Lactobacillales", "tax_id": "186826"},
"family": {"name": "Lactobacillaceae", "tax_id": "33958"},
"phylum": {"name": "Firmicutes", "tax_id": "1239"},
"no_rank": {"name": "cellular organisms", "tax_id": "131567"},
"superkingdom": {"name": "Bacteria", "tax_id": "2"},
"species_group": {"name": "Lactobacillus casei group", "tax_id": "655183"}
}
tree = OrderedDict()
tree['no_rank'] = {"name": "root", "tax_id": "1"}
tree["superkingdom"] = {"name": "Bacteria", "tax_id": "2"}
tree["phylum"] = {"name": "Firmicutes", "tax_id": "1239"}
tree["class"] = {"name": "Bacilli", "tax_id": "91061"}
tree["order"] = {"name": "Lactobacillales", "tax_id": "186826"}
tree["family"] = {"name": "Lactobacillaceae", "tax_id": "33958"}
tree["genus"] = {"name": "Lactobacillus", "tax_id": "1578"}
tree["species_group"] = {"name": "Lactobacillus casei group", "tax_id": "655183"}
db_generator.generate_db_from_tree(tree)
def _generate_escherichia_db(db_generator):
tree = {
"class": {"name": "Gammaproteobacteria", "tax_id": "1236"},
"genus": {"name": "Escherichia", "tax_id": "561"},
"order": {"name": "Enterobacterales", "tax_id": "91347"},
"family": {"name": "Enterobacteriaceae", "tax_id": "543"},
"phylum": {"name": "Proteobacteria", "tax_id": "1224"},
"no_rank": {"name": "cellular organisms", "tax_id": "131567"},
"species": {"name": "Escherichia coli", "tax_id": "562"},
"superkingdom": {"name": "Bacteria", "tax_id": "2"}
}
tree = OrderedDict()
tree["no_rank"] = {"name": "root", "tax_id": "1"}
tree["superkingdom"] = {"name": "Bacteria", "tax_id": "2"}
tree["phylum"] = {"name": "Proteobacteria", "tax_id": "1224"}
tree["class"] = {"name": "Gammaproteobacteria", "tax_id": "1236"}
tree["order"] = {"name": "Enterobacterales", "tax_id": "91347"}
tree["family"] = {"name": "Enterobacteriaceae", "tax_id": "543"}
tree["genus"] = {"name": "Escherichia", "tax_id": "561"}
tree["species"] = {"name": "Escherichia coli", "tax_id": "562"}
db_generator.generate_db_from_tree(tree)
......
import logging
from django.core.management.base import BaseCommand
from metagenedb.apps.catalog.factory.taxonomy import generate_simple_db as gen_tax_db
from metagenedb.apps.catalog.models import (
Gene, KeggOrthology, Taxonomy
)
logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)
def create_functions_db():
KeggOrthology.objects.all().delete()
keggs_to_create = {
"K03556": {
'name': 'malT',
'long_name': "LuxR family transcriptional regulator, maltose regulon positive regulatory protein"
},
"K02229": {
'name': "cobG",
'long_name': 'precorrin-3B synthase [EC:1.14.13.83]'
}
}
for kegg_id, values in keggs_to_create.items():
KeggOrthology(
function_id=kegg_id,
name=values.get('name'),
long_name=values.get('long_name')
).save()
def create_taxonomy_db():
Taxonomy.objects.all().delete()
gen_tax_db()
def create_genes_db():
pass
def create_small_db():
create_functions_db()
create_taxonomy_db()
create_genes_db()
class Command(BaseCommand):
help = 'Create a light DB with random items to illustrate functionnalities of the application.'
def set_logger_level(self, verbosity):
if verbosity > 2:
logger.setLevel(logging.DEBUG)
elif verbosity > 1:
logger.setLevel(logging.INFO)
def handle(self, *args, **options):
self.set_logger_level(int(options['verbosity']))
create_small_db()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment