Commit 77ae83bb authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

update backend with new hierarchy model

parent 112da486
Pipeline #21282 passed with stages
in 2 minutes and 45 seconds
from marshmallow.exceptions import ValidationError
from rest_framework.response import Response
from rest_framework.status import HTTP_422_UNPROCESSABLE_ENTITY
from metagenedb.api.catalog.filters import TaxonomyFilter
from metagenedb.api.catalog.qparams_validators.taxonomy import TaxonomyQueryParams
from metagenedb.apps.catalog.models import Taxonomy
......@@ -12,18 +8,8 @@ from .base import BulkViewSet
class TaxonomyViewSet(BulkViewSet):
queryset = Taxonomy.objects.select_related(
"parent", "superkingdom", "kingdom", "phylum", "class_rank", "order", "family", "genus", "species").all()
"parent").all()
serializer_class = TaxonomySerializer
lookup_field = 'tax_id'
filterset_class = TaxonomyFilter
query_params_parser = TaxonomyQueryParams
def retrieve(self, request, *args, **kwargs):
try:
query_params = self._get_qparams(request.query_params) # noqa
except ValidationError as validation_error:
return Response(validation_error.normalized_messages(), status=HTTP_422_UNPROCESSABLE_ENTITY)
instance = self.get_object()
hierarchy = instance.parental_hierarchy # noqa
serializer = self.get_serializer(instance)
return Response(serializer.data)
......@@ -3,13 +3,60 @@ from django_admin_listfilter_dropdown.filters import DropdownFilter
from metagenedb.apps.catalog.models import Taxonomy
RANK_DISPLAY = [f"get_{i}" for i in [
'superkingdom', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
]
@admin.register(Taxonomy)
class TaxonomyAdmin(admin.ModelAdmin):
list_display = (
'tax_id', 'name', 'rank', 'superkingdom',
'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species',
)
'tax_id', 'name', 'rank', 'get_parent',
) + tuple(RANK_DISPLAY)
list_filter = (('rank', DropdownFilter),)
search_fields = ('tax_id', 'name')
def get_parent(self, obj):
if obj.parent:
return f"{obj.parent.name} ({obj.parent.rank})"
return '-'
get_parent.short_description = 'Parent'
def _get_taxonomy(self, obj, rank):
if obj.hierarchy:
if obj.hierarchy.get(rank) is not None:
return "{}".format(obj.hierarchy.get(rank).get('name', '-'))
return '-'
def get_superkingdom(self, obj):
return self._get_taxonomy(obj, 'superkingdom')
get_superkingdom.short_description = 'Superkingdom'
def get_kingdom(self, obj):
return self._get_taxonomy(obj, 'kingdom')
get_kingdom.short_description = 'Kingdom'
def get_phylum(self, obj):
return self._get_taxonomy(obj, 'phylum')
get_phylum.short_description = 'Phylum'
def get_class(self, obj):
return self._get_taxonomy(obj, 'class')
get_class.short_description = 'Class'
def get_order(self, obj):
return self._get_taxonomy(obj, 'order')
get_order.short_description = 'Order'
def get_family(self, obj):
return self._get_taxonomy(obj, 'family')
get_family.short_description = 'Family'
def get_genus(self, obj):
return self._get_taxonomy(obj, 'genus')
get_genus.short_description = 'Genus'
def get_species(self, obj):
return self._get_taxonomy(obj, 'species')
get_species.short_description = 'Species'
......@@ -4,6 +4,8 @@ from django.core.management.base import BaseCommand
from metagenedb.apps.catalog.models import Taxonomy
from metagenedb.common.utils.profiling import profile
logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)
......@@ -19,11 +21,12 @@ class HierarchyBuilder:
self.hierarchy_built = 0
self.hierarchy_failed = 0
@profile('/Users/khillion/Sandbox/tax_only_many_parents.prof')
def build_all(self, chunk_size=8000, test=False):
logger.info("Building all hierarchy for all %s taxonomy items...", self.total_tax)
for taxonomy in self.queryset.iterator(chunk_size=chunk_size):
try:
hierarchy = taxonomy.parental_hierarchy # noqa
hierarchy = taxonomy.build_hierarchy() # noqa
self.hierarchy_built += 1
except Exception:
self.hierarchy_failed += 1
......@@ -49,10 +52,7 @@ class Command(BaseCommand):
logger.setLevel(logging.INFO)
def get_queryset(self):
return Taxonomy.objects.select_related(
SELECT_RELATED_PARENT, "superkingdom", "kingdom", "phylum", "class_rank",
"order", "family", "genus", "species"
).all()
return Taxonomy.objects.select_related(SELECT_RELATED_PARENT).all()
def handle(self, *args, **options):
self.set_logger_level(int(options['verbosity']))
......
# Generated by Django 3.0.1 on 2020-01-02 16:42
import django.contrib.postgres.fields.jsonb
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('catalog', '0021_index_gene_length'),
]
operations = [
migrations.RemoveField(
model_name='taxonomy',
name='class_rank',
),
migrations.RemoveField(
model_name='taxonomy',
name='family',
),
migrations.RemoveField(
model_name='taxonomy',
name='genus',
),
migrations.RemoveField(
model_name='taxonomy',
name='kingdom',
),
migrations.RemoveField(
model_name='taxonomy',
name='order',
),
migrations.RemoveField(
model_name='taxonomy',
name='phylum',
),
migrations.RemoveField(
model_name='taxonomy',
name='species',
),
migrations.RemoveField(
model_name='taxonomy',
name='superkingdom',
),
migrations.AddField(
model_name='taxonomy',
name='hierarchy',
field=django.contrib.postgres.fields.jsonb.JSONField(null=True),
),
]
from django.db import models
from django.contrib.postgres.fields import JSONField
class Taxonomy(models.Model):
......@@ -51,81 +52,23 @@ class Taxonomy(models.Model):
on_delete=models.SET_NULL,
null=True, blank=True,
)
superkingdom = models.ForeignKey(
'Taxonomy', related_name='superkingdom_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
kingdom = models.ForeignKey(
'Taxonomy', related_name='kingdom_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
phylum = models.ForeignKey(
'Taxonomy', related_name='phylum_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
class_rank = models.ForeignKey(
'Taxonomy', related_name='class_children',
on_delete=models.SET_NULL,
null=True, blank=True,
verbose_name="class"
)
order = models.ForeignKey(
'Taxonomy', related_name='order_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
family = models.ForeignKey(
'Taxonomy', related_name='family_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
genus = models.ForeignKey(
'Taxonomy', related_name='genus_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
species = models.ForeignKey(
'Taxonomy', related_name='species_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
hierarchy = JSONField(null=True)
def __str__(self):
return f"{self.name}"
@property
def parental_hierarchy(self):
if self.kingdom is None and self.superkingdom is None:
return self._build_parental_hierarchy()
return self._dict_parental_hierarchy()
def _dict_parental_hierarchy(self):
"""
Return parental hierarchy from
"""
ranks = [
"superkingdom", "kingdom", "phylum", "class_rank", "order", "family", "genus", "species"
]
hierarchy = {}
for rank in ranks:
if getattr(self, rank, None) is not None:
hierarchy[rank] = getattr(self, rank)
return hierarchy
def _build_parental_hierarchy(self):
def build_hierarchy(self):
"""
Build and save parental hierarchy for an entry
"""
hierarchy = {}
if self.name != 'root' and self.parent is not None:
hierarchy[self.rank] = self
hierarchy = {**hierarchy, **self.parent.parental_hierarchy}
for level, value in hierarchy.items():
setattr(self, level, value)
hierarchy[self.rank] = {
'tax_id': self.tax_id,
'name': self.name
}
hierarchy = {**hierarchy, **getattr(self.parent, 'hierarchy', self.parent.build_hierarchy())}
self.hierarchy = hierarchy
self.save()
return hierarchy
......
......@@ -5,37 +5,42 @@ from metagenedb.apps.catalog.factory import TaxonomyFactory
class TestBuildHierarchy(APITestCase):
def setUp(self):
@classmethod
def setUpTestData(cls):
"""
Build some test data for different tests
"""
self.root = TaxonomyFactory.create(
cls.root = TaxonomyFactory.create(
tax_id="1",
name="root",
rank="no_rank",
)
self.kingdom = TaxonomyFactory(
cls.kingdom = TaxonomyFactory(
tax_id="2",
name="KINGDOM",
rank="kingdom",
parent=self.root
parent=cls.root
)
self.phylum = TaxonomyFactory(
cls.phylum = TaxonomyFactory(
tax_id="3",
name="PHYLUM",
rank="phylum",
parent=self.kingdom
parent=cls.kingdom
)
def test_build_hierarchy(self):
expected_dict = {
'phylum': self.phylum,
'kingdom': self.kingdom
'phylum': {
'tax_id': self.phylum.tax_id,
'name': self.phylum.name
},
'kingdom': {
'tax_id': self.kingdom.tax_id,
'name': self.kingdom.name
}
}
self.assertNotEqual(getattr(self.phylum, 'kingdom', None), self.kingdom)
test_dict = self.phylum.parental_hierarchy
self.assertDictEqual(test_dict, expected_dict)
self.assertEqual(getattr(self.phylum, 'kingdom', None), self.kingdom)
# Now try a second time from saved information
test_dict = self.phylum.parental_hierarchy
self.assertIsNone(getattr(self.phylum, 'hierarchy'))
test_dict = self.phylum.build_hierarchy()
self.assertDictEqual(test_dict, expected_dict)
self.assertIsNotNone(getattr(self.phylum, 'hierarchy'))
self.assertDictEqual(getattr(self.phylum, 'hierarchy'), expected_dict)
......@@ -41,42 +41,12 @@ class GeneStatistics(Statistics):
}
return self.get_queryset(filters=filters).distinct().count()
def gene_length(self, window_size=1000, stop_at=10000, filters=None):
"""
Count how many gene by window of gene length.
"""
if not self.get_queryset().exists():
return {
'counts': [],
'labels': []
}
if filters is None:
queryset = self.get_queryset().only('length')
else:
queryset = self.get_queryset(filters=filters).distinct().only('length')
length_max = queryset.aggregate(Max('length')).get('length__max', 0)
stop_at = length_max if length_max < stop_at else stop_at
all_ranges = [[i, i + window_size] for i in range(0, stop_at + 1, window_size)]
all_ranges[-1][1] = length_max + 1 # last should contain all above the stop_at
data = []
labels = []
for rg in all_ranges:
labels.append(f"{rg[0]/1000}k-{rg[1]/1000}k")
data.append(queryset.filter(length__gte=rg[0], length__lt=rg[1]).count())
# Change labels
labels[0] = f"<{labels[0].split('-')[1]}"
labels[-1] = f">{labels[-1].split('-')[0]}"
return {
'counts': data,
'labels': labels
}
def taxonomy_repartition(self, level="phylum"):
level = "class_rank" if level == "class" else level
queryset = self.get_queryset().select_related(f'taxonomy__{level}')
filter_no_annotation = {f"taxonomy__{level}__isnull": True}
filter_annotation = {f"taxonomy__{level}__isnull": False}
value_to_retrieve = f'taxonomy__{level}__name'
filter_no_annotation = {f"taxonomy__hierarchy__{level}__isnull": True}
filter_annotation = {f"taxonomy__hierarchy__{level}__isnull": False}
value_to_retrieve = f'taxonomy__hierarchy__{level}__name'
taxonomy_counts = defaultdict(lambda: 0)
taxonomy_counts['No annotation'] = queryset.filter(**filter_no_annotation).values().count()
if taxonomy_counts['No annotation'] == 0:
......
......@@ -15,6 +15,10 @@ class BaseTestGeneStatistics(APITestCase):
class TestTaxonomyRepartition(BaseTestGeneStatistics):
@classmethod
def setUpTestData(cls):
cls.parent_root = TaxonomyFactory(rank="root")
def test_taxonomy_counts_no_content(self):
expected_dict = {
'labels': [],
......@@ -26,8 +30,9 @@ class TestTaxonomyRepartition(BaseTestGeneStatistics):
def test_taxonomy_repartition(self):
tax_name = "TaxTest"
taxonomy = TaxonomyFactory(rank='phylum', name=tax_name)
taxonomy.phylum = taxonomy # link taxonomy to itself as phylum
taxonomy.parent = self.parent_root
taxonomy.save()
taxonomy.build_hierarchy()
gene = GeneFactory.create(taxonomy=taxonomy) # noqa
expected_dict = {
'labels': [tax_name],
......@@ -39,8 +44,9 @@ class TestTaxonomyRepartition(BaseTestGeneStatistics):
def test_taxonomy_counts_class_level(self):
tax_name = "TaxTest"
taxonomy = TaxonomyFactory(rank='class_rank', name=tax_name)
taxonomy.class_rank = taxonomy # link taxonomy to itself as phylum
taxonomy.parent = self.parent_root
taxonomy.save()
taxonomy.build_hierarchy()
gene = GeneFactory.create(taxonomy=taxonomy) # noqa
expected_dict = {
'labels': [tax_name],
......
......@@ -27,59 +27,10 @@ class TaxonomySerializer(serializers.ModelSerializer):
source='parent',
required=False,
)
superkingdom = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
kingdom = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
phylum = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
class_rank = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
order = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
family = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
genus = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
species = AsymetricSlugRelatedField.from_serializer(
SimpleTaxonomySerializer,
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
class Meta:
model = Taxonomy
list_serializer_class = TaxonomyListSerializer
fields = (
'tax_id', 'name', 'rank', 'parent_tax_id', 'superkingdom',
'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species',
'tax_id', 'name', 'rank', 'parent_tax_id', 'hierarchy'
)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment