From e8a384d44b6d9c671020f52fb067ec2978dac301 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion <kenzo-hugo.hillion1@pasteur.fr> Date: Mon, 28 Oct 2019 16:29:31 +0100 Subject: [PATCH] handle function in bulk for gene and tests --- .gitignore | 3 +- .../apps/catalog/factory/__init__.py | 1 + .../metagenedb/apps/catalog/factory/gene.py | 21 ++++++++ .../apps/catalog/models/__init__.py | 2 +- .../apps/catalog/serializers/gene.py | 41 +++++++++++--- .../catalog/serializers/test_bulk_list.py | 33 ++++++++++-- .../apps/catalog/serializers/test_gene.py | 53 +++++++++++++++++++ 7 files changed, 141 insertions(+), 13 deletions(-) create mode 100644 backend/metagenedb/apps/catalog/factory/gene.py create mode 100644 backend/metagenedb/apps/catalog/serializers/test_gene.py diff --git a/.gitignore b/.gitignore index 96540b9..5437400 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ .DS_Store __pycache__/ *.egg-info/ -.env +*.env .env_dev .idea/ .vscode @@ -19,3 +19,4 @@ notebooks/ # Tests files .coverage +.pytest_cache diff --git a/backend/metagenedb/apps/catalog/factory/__init__.py b/backend/metagenedb/apps/catalog/factory/__init__.py index 0bc6402..2621779 100644 --- a/backend/metagenedb/apps/catalog/factory/__init__.py +++ b/backend/metagenedb/apps/catalog/factory/__init__.py @@ -1,2 +1,3 @@ from .function import FunctionFactory # noqa +from .gene import GeneFactory # noqa from .taxonomy import TaxonomyFactory # noqa diff --git a/backend/metagenedb/apps/catalog/factory/gene.py b/backend/metagenedb/apps/catalog/factory/gene.py new file mode 100644 index 0000000..ba0ca0f --- /dev/null +++ b/backend/metagenedb/apps/catalog/factory/gene.py @@ -0,0 +1,21 @@ +from factory import DjangoModelFactory, lazy_attribute +from faker import Factory + +from metagenedb.apps.catalog import models + +faker = Factory.create() + +SELECTED_SOURCE = [i[0] for i in models.Function.SOURCE_CHOICES] + + +class GeneFactory(DjangoModelFactory): + class Meta: + model = models.Gene + + @lazy_attribute + def gene_id(self): + return str(faker.pyint()) + + @lazy_attribute + def length(self): + return str(faker.pyint()) diff --git a/backend/metagenedb/apps/catalog/models/__init__.py b/backend/metagenedb/apps/catalog/models/__init__.py index 6968989..c80511a 100644 --- a/backend/metagenedb/apps/catalog/models/__init__.py +++ b/backend/metagenedb/apps/catalog/models/__init__.py @@ -1,3 +1,3 @@ from .function import Function, KeggOrthology # noqa -from .gene import Gene # noqa +from .gene import Gene, GeneFunction # noqa from .taxonomy import Taxonomy # noqa diff --git a/backend/metagenedb/apps/catalog/serializers/gene.py b/backend/metagenedb/apps/catalog/serializers/gene.py index 22e6094..131c47e 100644 --- a/backend/metagenedb/apps/catalog/serializers/gene.py +++ b/backend/metagenedb/apps/catalog/serializers/gene.py @@ -3,8 +3,7 @@ import traceback from rest_framework import serializers from rest_framework.utils import model_meta -from metagenedb.apps.catalog.models import Function, Gene, Taxonomy -from metagenedb.apps.catalog.serializers import FunctionSerializer +from metagenedb.apps.catalog.models import Function, Gene, GeneFunction, Taxonomy from .bulk_list import BulkListSerializer @@ -16,15 +15,45 @@ class GeneListSerializer(BulkListSerializer): class Meta: model = Gene + def _generate_gene_function_mapping(self, values, genes): + """ + Generate a list of GeneFunction pair to create relation between them + """ + genes_dict = {gene.gene_id: gene for gene in genes} + mapping = [] + for value in values: + for function in value['functions']: + mapping.append(GeneFunction(gene=genes_dict[value['gene_id']], + function=function)) + return mapping + + def _handle_functions(self, values): + """ + :param values: each dictionnary has the 'functions' and 'gene_id' keys + :type values: LIST of DICT + """ + # Get all Gene objects + gene_ids = [item['gene_id'] for item in values] + # Get all link with corresponding genes & Delete them + genes = Gene.objects.filter(gene_id__in=gene_ids) + GeneFunction.objects.filter(gene__in=genes).delete() + # Generate table for bulk_create of function <-> gene and create it + GeneFunction.objects.bulk_create( + self._generate_gene_function_mapping(values, genes) + ) + print("hi") + def create(self, validated_data): instances = super().create(validated_data) return instances class GeneSerializer(serializers.ModelSerializer): - functions = FunctionSerializer( + functions = serializers.SlugRelatedField( + queryset=Function.objects.all(), + slug_field='function_id', many=True, - required=False + required=False, ) taxonomy = serializers.SlugRelatedField( queryset=Taxonomy.objects.all(), @@ -47,12 +76,12 @@ class GeneSerializer(serializers.ModelSerializer): def _handle_functions(self, functions, instance): for function in functions: try: - function = Function.objects.get(function_id=function.get('function_id')) + function = Function.objects.get(function_id=function) instance.functions.add(function) instance.full_clean() instance.save() except Function.DoesNotExist: - _LOGGER.warning(f"{function.get('function_id')} not found for {instance.gene_id}. Function ignored") + _LOGGER.warning("%s not found for %s. Function ignored", function, instance.gene_id) def create(self, validated_data): ModelClass = self.Meta.model diff --git a/backend/metagenedb/apps/catalog/serializers/test_bulk_list.py b/backend/metagenedb/apps/catalog/serializers/test_bulk_list.py index 10b0ec3..b5e7377 100644 --- a/backend/metagenedb/apps/catalog/serializers/test_bulk_list.py +++ b/backend/metagenedb/apps/catalog/serializers/test_bulk_list.py @@ -52,6 +52,29 @@ class TestExtractManyToMany(BaseTestBulkListSerializerMethods): self.assertDictEqual(tested_dict, expected_dict) self.assertNotEqual(ori_list, self.data) + def test_extract_many_to_many_two_fields(self): + self.info.relations = { + 'field1': Mock(to_many=True), + 'field2': Mock(to_many=True) + } + ori_list = deepcopy(self.data) + expected_dict = { + 'keys': {'field1', 'field2'}, + 'values': { + 'field1': [ + {'field1': 'value1', 'id': 'entry_1'}, + {'field1': 'value3', 'id': 'entry_2'} + ], + 'field2': [ + {'field2': 'value2', 'id': 'entry_1'}, + {'field2': 'value4', 'id': 'entry_2'} + ] + } + } + tested_dict = self.bulk_list_serializer._extract_many_to_many(self.data, self.info, 'id') + self.assertDictEqual(tested_dict, expected_dict) + self.assertNotEqual(ori_list, self.data) + def test_extract_no_many_to_many(self): self.info.relations = { 'field1': Mock(to_many=False), @@ -67,18 +90,18 @@ class TestExtractManyToMany(BaseTestBulkListSerializerMethods): self.assertListEqual(ori_list, self.data) -class TestGetDbIndexFields(BaseTestBulkListSerializerMethods): +class TestGetAllKeyFields(BaseTestBulkListSerializerMethods): - def test_get_db_index_fields(self): + def test_get_all_key_fields(self): expected_keys = ['field1', 'field2'] tested_keys = self.bulk_list_serializer._get_all_key_fields(self.data) for key in expected_keys: self.assertIn(key, tested_keys) -class TestGetKeyFields(BaseTestBulkListSerializerMethods): +class TestGetDbIndexFields(BaseTestBulkListSerializerMethods): - def test_get_all_key_fields(self): + def test_get_db_index_fields(self): self.info.fields = { 'field1': Mock(db_index=True), 'field2': Mock(db_index=False) @@ -87,7 +110,7 @@ class TestGetKeyFields(BaseTestBulkListSerializerMethods): tested_keys = self.bulk_list_serializer._get_db_index_fields(self.info) self.assertListEqual(tested_keys, expected_keys) - def test_get_all_key_fields_no_keys(self): + def test_get_db_index_fields_no_keys(self): self.info.fields = { 'field1': Mock(db_index=False), 'field2': Mock(db_index=False) diff --git a/backend/metagenedb/apps/catalog/serializers/test_gene.py b/backend/metagenedb/apps/catalog/serializers/test_gene.py new file mode 100644 index 0000000..10e99b8 --- /dev/null +++ b/backend/metagenedb/apps/catalog/serializers/test_gene.py @@ -0,0 +1,53 @@ +from unittest import TestCase + +from rest_framework.test import APITestCase + +from metagenedb.apps.catalog.factory import FunctionFactory, GeneFactory +from metagenedb.apps.catalog.models import GeneFunction +from metagenedb.apps.catalog.serializers.gene import GeneListSerializer + + +class GeneListSerializerTest(GeneListSerializer): + """ + overload to skip __init__() and just test the _extract_many_to_many method. + """ + + def __init__(self): + pass + + +class TestGenerateGeneFunctionMapping(TestCase): + + def test_generate_gene_function_mapping(self): + genes = [GeneFactory.build(gene_id='gene_1')] + values = [{ + 'gene_id': 'gene_1', + 'functions': [FunctionFactory.build()] + }] + expected_object = GeneFunction(gene=genes[0], function=values[0]['functions'][0]) + serializer = GeneListSerializerTest() + tested_object = serializer._generate_gene_function_mapping(values, genes)[0] + self.assertEqual(tested_object.gene, expected_object.gene) + self.assertEqual(tested_object.function, expected_object.function) + + +class TestHandleFunctions(APITestCase): + + def test_handle_functions(self): + """ + @TODO function in the gene object is not found but the link is created + """ + gene = GeneFactory.create() + function = FunctionFactory.create() + values = [{ + 'gene_id': gene.gene_id, + 'functions': [function] + }] + self.assertNotEqual(gene.functions, [function]) + self.assertEqual(GeneFunction.objects.all().count(), 0) + serializer = GeneListSerializerTest() + serializer._handle_functions(values) + link = GeneFunction.objects.all() + self.assertEqual(link.count(), 1) + self.assertEqual(link[0].gene.gene_id, gene.gene_id) + self.assertEqual(link[0].function.function_id, function.function_id) -- GitLab