Commit 5ae2909a authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

Merge branch '23-taxonomy-model' into 'master'

add superkingdom and improve performances

Closes #23

See merge request !4
parents 365da9d0 bee34ed2
Pipeline #13265 passed with stage
in 1 minute and 19 seconds
......@@ -7,7 +7,7 @@ from metagenedb.apps.catalog.models import Taxonomy
class TaxonomyAdmin(admin.ModelAdmin):
list_display = (
'tax_id', 'name', 'rank',
'tax_id', 'name', 'rank', 'superkingdom',
'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species',
)
search_fields = ('tax_id', 'name')
# Generated by Django 2.2.1 on 2019-07-18 09:34
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('catalog', '0003_auto_20190717_1551'),
]
operations = [
migrations.AddField(
model_name='taxonomy',
name='superkingdom',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='superkingdom_children', to='catalog.Taxonomy'),
),
]
......@@ -52,6 +52,11 @@ class Taxonomy(models.Model):
null=True, blank=True,
)
superkingdom = models.ForeignKey(
'Taxonomy', related_name='superkingdom_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
kingdom = models.ForeignKey(
'Taxonomy', related_name='kingdom_children',
on_delete=models.SET_NULL,
......@@ -74,7 +79,7 @@ class Taxonomy(models.Model):
null=True, blank=True,
)
family = models.ForeignKey(
'Taxonomy', related_name='familyphy_children',
'Taxonomy', related_name='family_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
......
......@@ -10,6 +10,11 @@ class TaxonomySerializer(serializers.ModelSerializer):
source='parent',
required=False,
)
superkingdom = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
kingdom = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
......@@ -49,6 +54,6 @@ class TaxonomySerializer(serializers.ModelSerializer):
class Meta:
model = Taxonomy
fields = (
'tax_id', 'name', 'rank', 'parent_tax_id',
'tax_id', 'name', 'rank', 'parent_tax_id', 'superkingdom',
'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species',
)
......@@ -18,6 +18,8 @@ from metagenedb.apps.catalog.serializers import TaxonomySerializer # noqa
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)
SELECT_RELATED_PARENT = "parent{}".format("__parent" * 40)
def import_names(taxonomy_names_file, select_class="scientific name"):
"""
......@@ -62,21 +64,30 @@ def update_taxo_nodes(taxonomy_nodes_file):
_LOGGER.warning(f"Invalid data: {serializer.errors}. Link to parent skipped. Data: {serializer.data}")
def build_hierarchy():
def _build_hierarchy(taxo):
hierarchy = taxo.build_parental_hierarchy()
if 'class' in hierarchy.keys():
hierarchy['class_rank'] = hierarchy.pop('class')
serializer = TaxonomySerializer(taxo, hierarchy)
if serializer.is_valid():
serializer.save()
else:
_LOGGER.warning(f"Invalid data: {serializer.errors}. Building hierarchy skipped. Data: {serializer.data}")
def build_all_hierarchy(chunk_size=8000):
"""
Uses class method from Taxonomy model to retrieve the parental hierarchy and
assign corresponding attribute to each entry.
"""
_LOGGER.info(f"Linking taxonomy objects to parental nodes from direct parental nodes...")
for taxo in Taxonomy.objects.all():
hierarchy = taxo.build_parental_hierarchy()
if 'class' in hierarchy.keys():
hierarchy['class_rank'] = hierarchy.pop('class')
serializer = TaxonomySerializer(taxo, hierarchy)
if serializer.is_valid():
serializer.save()
else:
_LOGGER.warning(f"Invalid data: {serializer.errors}. Building hierarchy skipped. Data: {serializer.data}")
all_taxo = Taxonomy.objects.select_related(SELECT_RELATED_PARENT).all()
cpt = 0
for taxo in all_taxo.iterator(chunk_size=chunk_size):
_build_hierarchy(taxo)
cpt += 1
if cpt % 10000 == 0:
_LOGGER.info(f"{cpt}/{all_taxo.count()} hierachies built...")
def parse_arguments():
......@@ -99,7 +110,7 @@ def run():
taxonomy_names = import_names(args.names)
create_taxo_nodes(args.nodes, taxonomy_names)
update_taxo_nodes(args.nodes)
build_hierarchy()
build_all_hierarchy()
if __name__ == "__main__":
......
......@@ -20,6 +20,7 @@ services:
- main
db:
shm_size: '2gb'
container_name: db
image: postgres:latest
ports:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment