diff --git a/backend/metagenedb/apps/catalog/admin/taxonomy.py b/backend/metagenedb/apps/catalog/admin/taxonomy.py index cb5857ef338bda64c56f8b4d4f2f0956a0a9f3de..79090d33522e6487f19714f3f769881205ca9d2b 100644 --- a/backend/metagenedb/apps/catalog/admin/taxonomy.py +++ b/backend/metagenedb/apps/catalog/admin/taxonomy.py @@ -6,5 +6,8 @@ from metagenedb.apps.catalog.models import Taxonomy @admin.register(Taxonomy) class TaxonomyAdmin(admin.ModelAdmin): - list_display = ('tax_id', 'name', 'rank', 'parent') + list_display = ( + 'tax_id', 'name', 'rank', + 'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species', + ) search_fields = ('tax_id', 'name') diff --git a/backend/metagenedb/apps/catalog/migrations/0003_auto_20190717_1551.py b/backend/metagenedb/apps/catalog/migrations/0003_auto_20190717_1551.py new file mode 100644 index 0000000000000000000000000000000000000000..d54074d3fc529e3dcdcce3e8d8185431ab73e9c8 --- /dev/null +++ b/backend/metagenedb/apps/catalog/migrations/0003_auto_20190717_1551.py @@ -0,0 +1,54 @@ +# Generated by Django 2.2.1 on 2019-07-17 13:51 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('catalog', '0002_taxonomy'), + ] + + operations = [ + migrations.AddField( + model_name='taxonomy', + name='class_rank', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='class_children', to='catalog.Taxonomy', verbose_name='class'), + ), + migrations.AddField( + model_name='taxonomy', + name='family', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='family_children', to='catalog.Taxonomy'), + ), + migrations.AddField( + model_name='taxonomy', + name='genus', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='genus_children', to='catalog.Taxonomy'), + ), + migrations.AddField( + model_name='taxonomy', + name='kingdom', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='kingdom_children', to='catalog.Taxonomy'), + ), + migrations.AddField( + model_name='taxonomy', + name='order', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='order_children', to='catalog.Taxonomy'), + ), + migrations.AddField( + model_name='taxonomy', + name='phylum', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='phylum_children', to='catalog.Taxonomy'), + ), + migrations.AddField( + model_name='taxonomy', + name='species', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='species_children', to='catalog.Taxonomy'), + ), + migrations.AlterField( + model_name='taxonomy', + name='parent', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='direct_children', to='catalog.Taxonomy'), + ), + ] diff --git a/backend/metagenedb/apps/catalog/models/taxonomy.py b/backend/metagenedb/apps/catalog/models/taxonomy.py index 3f6fc43a62dc37fbaf0025639af3cb2dda9dac2c..ccd113596bbfcbcd3c3068331afe3d54ebd933ab 100644 --- a/backend/metagenedb/apps/catalog/models/taxonomy.py +++ b/backend/metagenedb/apps/catalog/models/taxonomy.py @@ -47,8 +47,44 @@ class Taxonomy(models.Model): name = models.CharField(max_length=200, default=NAME_DEFAULT) rank = models.CharField(max_length=20, choices=RANK_CHOICES) parent = models.ForeignKey( - 'Taxonomy', - related_name='children', + 'Taxonomy', related_name='direct_children', + on_delete=models.SET_NULL, + null=True, blank=True, + ) + + kingdom = models.ForeignKey( + 'Taxonomy', related_name='kingdom_children', + on_delete=models.SET_NULL, + null=True, blank=True, + ) + phylum = models.ForeignKey( + 'Taxonomy', related_name='phylum_children', + on_delete=models.SET_NULL, + null=True, blank=True, + ) + class_rank = models.ForeignKey( + 'Taxonomy', related_name='class_children', + on_delete=models.SET_NULL, + null=True, blank=True, + verbose_name="class" + ) + order = models.ForeignKey( + 'Taxonomy', related_name='order_children', + on_delete=models.SET_NULL, + null=True, blank=True, + ) + family = models.ForeignKey( + 'Taxonomy', related_name='familyphy_children', + on_delete=models.SET_NULL, + null=True, blank=True, + ) + genus = models.ForeignKey( + 'Taxonomy', related_name='genus_children', + on_delete=models.SET_NULL, + null=True, blank=True, + ) + species = models.ForeignKey( + 'Taxonomy', related_name='species_children', on_delete=models.SET_NULL, null=True, blank=True, ) @@ -56,5 +92,13 @@ class Taxonomy(models.Model): def __str__(self): return f"{self.name}" + def build_parental_hierarchy(self): + hierarchy = {} + if self.name != 'root' and self.parent is not None: + hierarchy[self.rank] = self.tax_id + hierarchy = {**hierarchy, **self.parent.build_parental_hierarchy()} + hierarchy['tax_id'] = self.tax_id + return hierarchy + class Meta: verbose_name_plural = "Taxonomy" diff --git a/backend/metagenedb/apps/catalog/models/test_taxonomy.py b/backend/metagenedb/apps/catalog/models/test_taxonomy.py new file mode 100644 index 0000000000000000000000000000000000000000..141485119c0b315d64d856c17bd9e52c88a46f3a --- /dev/null +++ b/backend/metagenedb/apps/catalog/models/test_taxonomy.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +from .taxonomy import Taxonomy + + +class TestBuildHierarchy(TestCase): + + @classmethod + def setUpClass(cls): + """ + Build some test data for different tests + """ + cls.root = Taxonomy( + tax_id="1", + name="root", + rank="no_rank", + ) + cls.kingdom = Taxonomy( + tax_id="2", + name="KINGDOM", + rank="kingdom", + parent=cls.root + ) + cls.phylum = Taxonomy( + tax_id="3", + name="PHYLUM", + rank="phylum", + parent=cls.kingdom + ) + + def test_build_hierarchy(self): + expected_dict = { + 'tax_id': '3', + 'phylum': '3', + 'kingdom': '2' + } + test_dict = self.phylum.build_parental_hierarchy() + self.assertDictEqual(test_dict, expected_dict) diff --git a/backend/metagenedb/apps/catalog/serializers/taxonomy.py b/backend/metagenedb/apps/catalog/serializers/taxonomy.py index 6c6b4e22d38fb218d17edd250851c5640c3689ee..87816c32c8b34c1ad5b90c524830e91799efe8e4 100644 --- a/backend/metagenedb/apps/catalog/serializers/taxonomy.py +++ b/backend/metagenedb/apps/catalog/serializers/taxonomy.py @@ -3,13 +3,52 @@ from metagenedb.apps.catalog.models import Taxonomy class TaxonomySerializer(serializers.ModelSerializer): + rank = serializers.CharField(required=False) parent_tax_id = serializers.SlugRelatedField( queryset=Taxonomy.objects.all(), slug_field='tax_id', source='parent', + required=False, + ) + kingdom = serializers.SlugRelatedField( + queryset=Taxonomy.objects.all(), + slug_field='tax_id', + required=False + ) + phylum = serializers.SlugRelatedField( + queryset=Taxonomy.objects.all(), + slug_field='tax_id', + required=False + ) + class_rank = serializers.SlugRelatedField( + queryset=Taxonomy.objects.all(), + slug_field='tax_id', + required=False + ) + order = serializers.SlugRelatedField( + queryset=Taxonomy.objects.all(), + slug_field='tax_id', + required=False + ) + family = serializers.SlugRelatedField( + queryset=Taxonomy.objects.all(), + slug_field='tax_id', + required=False + ) + genus = serializers.SlugRelatedField( + queryset=Taxonomy.objects.all(), + slug_field='tax_id', + required=False + ) + species = serializers.SlugRelatedField( + queryset=Taxonomy.objects.all(), + slug_field='tax_id', required=False - ) + ) class Meta: model = Taxonomy - fields = ('tax_id', 'name', 'rank', 'parent_tax_id') + fields = ( + 'tax_id', 'name', 'rank', 'parent_tax_id', + 'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species', + ) diff --git a/backend/scripts/populate_db/import_ncbi_taxonomy.py b/backend/scripts/populate_db/import_ncbi_taxonomy.py index 351b87e761f41fb86671e825829c908d472bbc11..3206c02fd85e41e39435f261de36e4852e25ab37 100755 --- a/backend/scripts/populate_db/import_ncbi_taxonomy.py +++ b/backend/scripts/populate_db/import_ncbi_taxonomy.py @@ -50,7 +50,7 @@ def create_taxo_nodes(taxonomy_nodes_file, taxo_name_dict): def update_taxo_nodes(taxonomy_nodes_file): - _LOGGER.info(f"Linking taxonomy objects to parental nodes from {taxonomy_nodes_file}...") + _LOGGER.info(f"Linking taxonomy objects to direct parental node from {taxonomy_nodes_file}...") with open(taxonomy_nodes_file, "r") as file: for i in file: node = NCBITaxonomyLineParser.node(i) @@ -59,7 +59,24 @@ def update_taxo_nodes(taxonomy_nodes_file): if serializer.is_valid(): serializer.save() else: - _LOGGER.warning(f"Invalid data: {serializer.errors}. Insertion skipped. Data: {serializer.data}") + _LOGGER.warning(f"Invalid data: {serializer.errors}. Link to parent skipped. Data: {serializer.data}") + + +def build_hierarchy(): + """ + Uses class method from Taxonomy model to retrieve the parental hierarchy and + assign corresponding attribute to each entry. + """ + _LOGGER.info(f"Linking taxonomy objects to parental nodes from direct parental nodes...") + for taxo in Taxonomy.objects.all(): + hierarchy = taxo.build_parental_hierarchy() + if 'class' in hierarchy.keys(): + hierarchy['class_rank'] = hierarchy.pop('class') + serializer = TaxonomySerializer(taxo, hierarchy) + if serializer.is_valid(): + serializer.save() + else: + _LOGGER.warning(f"Invalid data: {serializer.errors}. Building hierarchy skipped. Data: {serializer.data}") def parse_arguments(): @@ -82,6 +99,7 @@ def run(): taxonomy_names = import_names(args.names) create_taxo_nodes(args.nodes, taxonomy_names) update_taxo_nodes(args.nodes) + build_hierarchy() if __name__ == "__main__":