Commit b70d190e authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

Use API for genes and update script

parent 2e85bc4f
......@@ -3,6 +3,8 @@ from metagenedb.apps.catalog.models import Function
class FunctionSerializer(serializers.ModelSerializer):
name = serializers.CharField(required=False)
class Meta:
model = Function
fields = ('function_id', 'source', 'name')
import logging
import traceback
from rest_framework import serializers
from metagenedb.apps.catalog.models import Gene, Taxonomy
from rest_framework.utils import model_meta
from metagenedb.apps.catalog.models import Function, Gene, Taxonomy
from metagenedb.apps.catalog.serializers import FunctionSerializer
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)
class GeneSerializer(serializers.ModelSerializer):
functions = FunctionSerializer(many=True, required=False)
functions = FunctionSerializer(
many=True,
required=False
)
taxonomy = serializers.SlugRelatedField(
queryset=Taxonomy.objects.all(),
slug_field='tax_id',
......@@ -14,3 +24,55 @@ class GeneSerializer(serializers.ModelSerializer):
class Meta:
model = Gene
fields = ('gene_id', 'gene_name', 'length', 'functions', 'taxonomy')
def _handle_functions(self, functions, instance):
for function in functions:
try:
function = Function.objects.get(function_id=function.get('function_id'))
instance.functions.add(function)
instance.full_clean()
instance.save()
except Function.DoesNotExist:
_LOGGER.warning(f"{function.get('function_id')} not found for {instance.gene_id}. Function ignored")
def create(self, validated_data):
ModelClass = self.Meta.model
# Remove many-to-many relationships from validated_data.
# They are not valid arguments to the default `.create()` method,
# as they require that the instance has already been saved.
info = model_meta.get_field_info(ModelClass)
many_to_many = {}
for field_name, relation_info in info.relations.items():
if relation_info.to_many and (field_name in validated_data):
many_to_many[field_name] = validated_data.pop(field_name)
try:
instance = ModelClass._default_manager.create(**validated_data)
except TypeError:
tb = traceback.format_exc()
msg = (
'Got a `TypeError` when calling `%s.%s.create()`. '
'This may be because you have a writable field on the '
'serializer class that is not a valid argument to '
'`%s.%s.create()`. You may need to make the field '
'read-only, or override the %s.create() method to handle '
'this correctly.\nOriginal exception was:\n %s' %
(
ModelClass.__name__,
ModelClass._default_manager.name,
ModelClass.__name__,
ModelClass._default_manager.name,
self.__class__.__name__,
tb
)
)
raise TypeError(msg)
# Save many-to-many relationships after the instance is created.
print(many_to_many)
if many_to_many:
for field_name, value in many_to_many.items():
getattr(self, f'_handle_{field_name}', None)(value, instance)
return instance
......@@ -7,7 +7,6 @@ from itertools import islice
from requests.exceptions import HTTPError
import django
from rest_framework.exceptions import ValidationError
from slugify import slugify
from metagenedb.common.utils.api import MetageneDBCatalogGeneAPI
......@@ -17,8 +16,7 @@ from metagenedb.common.utils.parsers import IGCLineParser
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "metagenedb.settings")
django.setup()
from metagenedb.apps.catalog.models import Gene, Function, Taxonomy # noqa
from metagenedb.apps.catalog.serializers import GeneSerializer # noqa
from metagenedb.apps.catalog.models import Taxonomy # noqa
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)
......@@ -74,8 +72,10 @@ class ImportIGCGenes(object):
self.skip_functions = skip_functions
def _clean_gene(self, gene_dict):
print(gene_dict)
gene_dict['gene_name'] = gene_dict['gene_id']
gene_dict['gene_id'] = slugify(gene_dict['gene_id'])
gene_dict['functions'] = [{'function_id': gene_dict.pop('kegg_ko')}]
if self.skip_tax:
gene_dict.pop('taxonomy')
if self.skip_functions:
......@@ -86,7 +86,7 @@ class ImportIGCGenes(object):
clean_gene_dict = self._clean_gene(gene_dict)
try:
gene_id = clean_gene_dict['gene_id']
gene_obj = self.metagenedb_gene_api.get(gene_id) # Try to get obj to check if it exists
self.metagenedb_gene_api.get(gene_id) # Try to get obj to check if it exists
self.metagenedb_gene_api.put(gene_id, clean_gene_dict)
except HTTPError:
self.metagenedb_gene_api.post(clean_gene_dict)
......
......@@ -103,23 +103,26 @@ class MetageneDBCatalogGeneAPIMock(MetageneDBCatalogGeneAPI):
class TestUpsertGene(APITestCase):
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test')
self.import_igc_genes = ImportIGCGenes('test', 'test', skip_functions=True)
self.api_mock = MetageneDBCatalogGeneAPIMock(self.client)
self.import_igc_genes.metagenedb_gene_api = self.api_mock
def test_insert_valid_gene_no_kegg(self):
valid_gene = {
'gene_name': 'test_gene.01',
'gene_id': 'test-gene01',
'length': 3556
'gene_id': 'test-gene-01',
'length': 3556,
'kegg_ko': 'K00001'
}
self.import_igc_genes._upsert_gene(valid_gene)
self.assertEqual(self.api_mock.get_all()['count'], 1)
def test_insert_invalid_length(self):
invalid_gene = {
'gene_id': 'test-gene01',
'length': 'wrong_format'
'gene_name': 'test_gene.01',
'gene_id': 'test-gene-01',
'length': 'wrong_format',
'kegg_ko': 'K00001'
}
with self.assertRaises(HTTPError) as context: # noqa
self.import_igc_genes._upsert_gene(invalid_gene)
......@@ -127,18 +130,20 @@ class TestUpsertGene(APITestCase):
def test_update_gene(self):
valid_gene = {
'gene_name': 'test_gene.01',
'gene_id': 'test-gene01',
'length': 3556
'gene_id': 'test-gene-01',
'length': 3556,
'kegg_ko': 'K00001'
}
updated_gene = {
'gene_name': 'test_gene.01',
'gene_id': 'test-gene01',
'length': 356
'gene_id': 'test-gene-01',
'length': 356,
'kegg_ko': 'K00001'
}
self.import_igc_genes._upsert_gene(valid_gene)
self.assertEqual(self.api_mock.get('test-gene01')['length'], 3556)
self.assertEqual(self.api_mock.get('test-gene-01')['length'], 3556)
self.import_igc_genes._upsert_gene(updated_gene)
self.assertEqual(self.api_mock.get('test-gene01')['length'], 356)
self.assertEqual(self.api_mock.get('test-gene-01')['length'], 356)
class TestCleanGene(TestCase):
......@@ -149,7 +154,7 @@ class TestCleanGene(TestCase):
'gene_id': 'gene.01',
'length': 135,
'taxonomy': 'Taxo',
'kegg_ko': 'Genus'
'kegg_ko': 'K00001'
}
def test_clean_gene(self):
......@@ -158,7 +163,7 @@ class TestCleanGene(TestCase):
'gene_name': 'gene.01',
'length': 135,
'taxonomy': 'Taxo',
'kegg_ko': 'Genus'
'functions': [{'function_id': 'K00001'}]
}
test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
self.assertDictEqual(test_gene_dict, expected_gene_dict)
......@@ -169,7 +174,18 @@ class TestCleanGene(TestCase):
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'length': 135,
'kegg_ko': 'Genus'
'functions': [{'function_id': 'K00001'}]
}
test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
self.assertDictEqual(test_gene_dict, expected_gene_dict)
def test_clean_gene_skip_functions(self):
self.import_igc_genes.skip_functions = True
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'length': 135,
'taxonomy': 'Taxo',
}
test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
self.assertDictEqual(test_gene_dict, expected_gene_dict)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment