Skip to content
Snippets Groups Projects
Commit e679c49d authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion :recycle:
Browse files

Add building of hierarchy from direct parent

parent 45c841f0
No related branches found
No related tags found
1 merge request!3Integrate taxonomy to database
Pipeline #13223 passed with stage
in 1 minute and 21 seconds
...@@ -6,5 +6,8 @@ from metagenedb.apps.catalog.models import Taxonomy ...@@ -6,5 +6,8 @@ from metagenedb.apps.catalog.models import Taxonomy
@admin.register(Taxonomy) @admin.register(Taxonomy)
class TaxonomyAdmin(admin.ModelAdmin): class TaxonomyAdmin(admin.ModelAdmin):
list_display = ('tax_id', 'name', 'rank', 'parent') list_display = (
'tax_id', 'name', 'rank',
'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species',
)
search_fields = ('tax_id', 'name') search_fields = ('tax_id', 'name')
# Generated by Django 2.2.1 on 2019-07-17 13:51
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('catalog', '0002_taxonomy'),
]
operations = [
migrations.AddField(
model_name='taxonomy',
name='class_rank',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='class_children', to='catalog.Taxonomy', verbose_name='class'),
),
migrations.AddField(
model_name='taxonomy',
name='family',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='family_children', to='catalog.Taxonomy'),
),
migrations.AddField(
model_name='taxonomy',
name='genus',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='genus_children', to='catalog.Taxonomy'),
),
migrations.AddField(
model_name='taxonomy',
name='kingdom',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='kingdom_children', to='catalog.Taxonomy'),
),
migrations.AddField(
model_name='taxonomy',
name='order',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='order_children', to='catalog.Taxonomy'),
),
migrations.AddField(
model_name='taxonomy',
name='phylum',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='phylum_children', to='catalog.Taxonomy'),
),
migrations.AddField(
model_name='taxonomy',
name='species',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='species_children', to='catalog.Taxonomy'),
),
migrations.AlterField(
model_name='taxonomy',
name='parent',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='direct_children', to='catalog.Taxonomy'),
),
]
...@@ -47,8 +47,44 @@ class Taxonomy(models.Model): ...@@ -47,8 +47,44 @@ class Taxonomy(models.Model):
name = models.CharField(max_length=200, default=NAME_DEFAULT) name = models.CharField(max_length=200, default=NAME_DEFAULT)
rank = models.CharField(max_length=20, choices=RANK_CHOICES) rank = models.CharField(max_length=20, choices=RANK_CHOICES)
parent = models.ForeignKey( parent = models.ForeignKey(
'Taxonomy', 'Taxonomy', related_name='direct_children',
related_name='children', on_delete=models.SET_NULL,
null=True, blank=True,
)
kingdom = models.ForeignKey(
'Taxonomy', related_name='kingdom_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
phylum = models.ForeignKey(
'Taxonomy', related_name='phylum_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
class_rank = models.ForeignKey(
'Taxonomy', related_name='class_children',
on_delete=models.SET_NULL,
null=True, blank=True,
verbose_name="class"
)
order = models.ForeignKey(
'Taxonomy', related_name='order_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
family = models.ForeignKey(
'Taxonomy', related_name='familyphy_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
genus = models.ForeignKey(
'Taxonomy', related_name='genus_children',
on_delete=models.SET_NULL,
null=True, blank=True,
)
species = models.ForeignKey(
'Taxonomy', related_name='species_children',
on_delete=models.SET_NULL, on_delete=models.SET_NULL,
null=True, blank=True, null=True, blank=True,
) )
...@@ -56,5 +92,13 @@ class Taxonomy(models.Model): ...@@ -56,5 +92,13 @@ class Taxonomy(models.Model):
def __str__(self): def __str__(self):
return f"{self.name}" return f"{self.name}"
def build_parental_hierarchy(self):
hierarchy = {}
if self.name != 'root' and self.parent is not None:
hierarchy[self.rank] = self.tax_id
hierarchy = {**hierarchy, **self.parent.build_parental_hierarchy()}
hierarchy['tax_id'] = self.tax_id
return hierarchy
class Meta: class Meta:
verbose_name_plural = "Taxonomy" verbose_name_plural = "Taxonomy"
from unittest import TestCase
from .taxonomy import Taxonomy
class TestBuildHierarchy(TestCase):
@classmethod
def setUpClass(cls):
"""
Build some test data for different tests
"""
cls.root = Taxonomy(
tax_id="1",
name="root",
rank="no_rank",
)
cls.kingdom = Taxonomy(
tax_id="2",
name="KINGDOM",
rank="kingdom",
parent=cls.root
)
cls.phylum = Taxonomy(
tax_id="3",
name="PHYLUM",
rank="phylum",
parent=cls.kingdom
)
def test_build_hierarchy(self):
expected_dict = {
'tax_id': '3',
'phylum': '3',
'kingdom': '2'
}
test_dict = self.phylum.build_parental_hierarchy()
self.assertDictEqual(test_dict, expected_dict)
...@@ -3,13 +3,52 @@ from metagenedb.apps.catalog.models import Taxonomy ...@@ -3,13 +3,52 @@ from metagenedb.apps.catalog.models import Taxonomy
class TaxonomySerializer(serializers.ModelSerializer): class TaxonomySerializer(serializers.ModelSerializer):
rank = serializers.CharField(required=False)
parent_tax_id = serializers.SlugRelatedField( parent_tax_id = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(), queryset=Taxonomy.objects.all(),
slug_field='tax_id', slug_field='tax_id',
source='parent', source='parent',
required=False,
)
kingdom = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
phylum = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
class_rank = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
order = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
family = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
genus = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False
)
species = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
required=False required=False
) )
class Meta: class Meta:
model = Taxonomy model = Taxonomy
fields = ('tax_id', 'name', 'rank', 'parent_tax_id') fields = (
'tax_id', 'name', 'rank', 'parent_tax_id',
'kingdom', 'phylum', 'class_rank', 'order', 'family', 'genus', 'species',
)
...@@ -50,7 +50,7 @@ def create_taxo_nodes(taxonomy_nodes_file, taxo_name_dict): ...@@ -50,7 +50,7 @@ def create_taxo_nodes(taxonomy_nodes_file, taxo_name_dict):
def update_taxo_nodes(taxonomy_nodes_file): def update_taxo_nodes(taxonomy_nodes_file):
_LOGGER.info(f"Linking taxonomy objects to parental nodes from {taxonomy_nodes_file}...") _LOGGER.info(f"Linking taxonomy objects to direct parental node from {taxonomy_nodes_file}...")
with open(taxonomy_nodes_file, "r") as file: with open(taxonomy_nodes_file, "r") as file:
for i in file: for i in file:
node = NCBITaxonomyLineParser.node(i) node = NCBITaxonomyLineParser.node(i)
...@@ -59,7 +59,24 @@ def update_taxo_nodes(taxonomy_nodes_file): ...@@ -59,7 +59,24 @@ def update_taxo_nodes(taxonomy_nodes_file):
if serializer.is_valid(): if serializer.is_valid():
serializer.save() serializer.save()
else: else:
_LOGGER.warning(f"Invalid data: {serializer.errors}. Insertion skipped. Data: {serializer.data}") _LOGGER.warning(f"Invalid data: {serializer.errors}. Link to parent skipped. Data: {serializer.data}")
def build_hierarchy():
"""
Uses class method from Taxonomy model to retrieve the parental hierarchy and
assign corresponding attribute to each entry.
"""
_LOGGER.info(f"Linking taxonomy objects to parental nodes from direct parental nodes...")
for taxo in Taxonomy.objects.all():
hierarchy = taxo.build_parental_hierarchy()
if 'class' in hierarchy.keys():
hierarchy['class_rank'] = hierarchy.pop('class')
serializer = TaxonomySerializer(taxo, hierarchy)
if serializer.is_valid():
serializer.save()
else:
_LOGGER.warning(f"Invalid data: {serializer.errors}. Building hierarchy skipped. Data: {serializer.data}")
def parse_arguments(): def parse_arguments():
...@@ -82,6 +99,7 @@ def run(): ...@@ -82,6 +99,7 @@ def run():
taxonomy_names = import_names(args.names) taxonomy_names = import_names(args.names)
create_taxo_nodes(args.nodes, taxonomy_names) create_taxo_nodes(args.nodes, taxonomy_names)
update_taxo_nodes(args.nodes) update_taxo_nodes(args.nodes)
build_hierarchy()
if __name__ == "__main__": if __name__ == "__main__":
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment