Commit 0f854f02 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

Replace custom insertion_model by serializers

parent a569de25
......@@ -999,3 +999,4 @@
999 158499257-stool1_revised_C1458534_1_gene127873 11955 Complete USA unknown unknown unknown NOG295308 0.00315706393054459 0.00280373831775701 unknown unknown USA
1000 MH0385_GL0059251 11946 Lack both ends EUR unknown unknown unknown unknown 0.000789265982636148 0.000934579439252336 unknown unknown EUR
1000 MH0385_GL0059251 11946 Lack both ends EUR unknown unknown unknown unknown 0.000789265982636148 0.000934579439252336 unknown unknown EUR
353535 wrong_length the_length Info EUR unknown unknown unknown unknown 0.0000001 0.00000001 0.0000001 unknown unknown EUR
......@@ -7,7 +7,7 @@ import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('catalog', '0003_auto_20190717_1551'),
('catalog', '0003_complete_taxonomy'),
]
operations = [
......
# Generated by Django 2.2.1 on 2019-08-01 14:16
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('catalog', '0004_taxonomy_superkingdom'),
]
operations = [
migrations.AlterModelOptions(
name='gene',
options={'ordering': ['-gene_id']},
),
]
......@@ -10,3 +10,6 @@ class Gene(models.Model):
def __str__(self):
return self.gene_id
class Meta:
ordering = ['-gene_id']
......@@ -9,30 +9,13 @@ from rest_framework.decorators import (
from rest_framework.response import Response
from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
from metagenedb.apps.catalog.models import Function, Gene
from metagenedb.apps.catalog.models import Gene
from metagenedb.apps.catalog.serializers import GeneSerializer
from metagenedb.apps.catalog.views.insertion_model import InsertionBase
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)
class GeneInsertion(InsertionBase):
MANY_TO_MANY_FIELDS = ['kegg_ko']
model = Gene
obj_id = "gene_id"
def _link_kegg_ko(self, function_id):
VALUE_TO_SKIP = ['unknown']
if function_id not in VALUE_TO_SKIP:
try:
function = Function.objects.get(function_id=function_id)
self.obj.functions.add(function)
self.full_clean_and_save()
except Function.DoesNotExist:
_LOGGER.warning(f"{function_id} not found in the database. Full dict: {self.full_dict}.")
@api_view(['GET'])
@authentication_classes(())
@permission_classes(())
......
from abc import ABC
from metagenedb.utils.dict_operations import extract_dict
class InsertionBase(ABC):
"""
Base for insertion in DB for different models.
This base will be used for POST methods but also direct insertion to DB from scripts.
"""
MANY_TO_MANY_FIELDS = []
FOREIGN_KEY_FIELDS = []
SIMPLE_FIELDS = [] # Fields you want to be able to create with the class
@property
def model(self):
raise NotImplementedError
@property
def obj_id(self):
raise NotImplementedError
def __init__(self, model_dict):
self.full_dict = model_dict.copy()
self.foreign_key_dict = extract_dict(model_dict, self.FOREIGN_KEY_FIELDS)
self.many_to_many_dict = extract_dict(model_dict, self.MANY_TO_MANY_FIELDS)
if self.SIMPLE_FIELDS:
self.simple_dict = extract_dict(model_dict, self.SIMPLE_FIELDS)
else:
self.simple_dict = model_dict.copy()
self.obj = None
def upsert_to_db(self):
try:
self.obj = self.model.objects.get(**{self.obj_id: self.full_dict.get(self.obj_id)})
for key, value in self.simple_dict.items():
setattr(self.obj, key, value)
except self.model.DoesNotExist:
self.create_obj()
self.full_clean_and_save()
self.handle_foreign_fields()
self.handle_many_to_many_fields()
def create_obj(self):
self.obj = self.model(**self.simple_dict)
def full_clean_and_save(self):
self.obj.full_clean()
self.obj.save()
def handle_foreign_fields(self):
for key, value in self.foreign_key_dict.items():
getattr(self, f"_link_{key}")(value)
def handle_many_to_many_fields(self):
for key, value in self.many_to_many_dict.items():
getattr(self, f"_link_{key}")(value)
......@@ -6,14 +6,14 @@ import sys
from itertools import islice
import django
from django.core.exceptions import ValidationError
from rest_framework.exceptions import ValidationError
# Before model import, we need to called django.setup() to Load apps
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "metagenedb.settings")
django.setup()
from metagenedb.apps.catalog.models import Gene, Function # noqa
from metagenedb.apps.catalog.views.gene import GeneInsertion # noqa
from metagenedb.apps.catalog.serializers import GeneSerializer # noqa
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)
......@@ -46,13 +46,21 @@ def parse_gene(raw_line):
}
def upsert_gene(gene_dict):
try:
gene_obj = Gene.objects.get(gene_id=gene_dict.get('gene_id'))
serializer = GeneSerializer(gene_obj, data=gene_dict)
except Gene.DoesNotExist:
serializer = GeneSerializer(data=gene_dict)
serializer.is_valid(raise_exception=True)
serializer.save()
def insert_gene_list(chunk_genes):
for gene_line in chunk_genes:
gene_dict = parse_gene(gene_line)
try:
gene_dict = parse_gene(gene_line)
# insert_gene(gene_dict)
gene_insertion = GeneInsertion(gene_dict)
gene_insertion.upsert_to_db()
upsert_gene(gene_dict)
except ValidationError as e:
_LOGGER.warning(f"{e.__dict__} for gene_id: {gene_dict.get('gene_id')}. Insertion skipped.")
......
from unittest import TestCase
from rest_framework.exceptions import ValidationError
from rest_framework.test import APITestCase
from metagenedb.apps.catalog.models import Gene
from scripts.populate_db.import_igc_data import parse_gene, upsert_gene
class TestParseGene(TestCase):
def test_parse_gene(self):
raw_data = [
'gene_id',
'gene_name',
'gene_length',
'gene_completeness_status',
'cohort_origin',
'taxo_phylum',
'taxo_genus',
'kegg',
'eggnog',
'sample_occurence_freq',
'ind_occurence_freq',
'kegg_functional_cat',
'eggnog_functional_cat',
'cohort_assembled'
]
raw_line = "\t".join(raw_data)
expected_dict = {
'gene_id': 'gene_name', # We use the gene name for our gene ID
'gene_length': 'gene_length',
'kegg_ko': 'kegg'
}
tested_dict = parse_gene(raw_line)
self.assertDictEqual(tested_dict, expected_dict)
class TestUpsertGene(APITestCase):
def test_insert_valid_gene_no_kegg(self):
valid_gene = {
'gene_id': 'test_gene01',
'gene_length': 3556
}
upsert_gene(valid_gene)
self.assertEqual(Gene.objects.all().count(), 1)
def test_insert_invalid_gene_length(self):
invalid_gene = {
'gene_id': 'test_gene01',
'gene_length': 'wrong_format'
}
with self.assertRaises(ValidationError) as context: # noqa
upsert_gene(invalid_gene)
def test_update_gene(self):
valid_gene = {
'gene_id': 'test_gene01',
'gene_length': 3556
}
updated_gene = {
'gene_id': 'test_gene01',
'gene_length': 356
}
upsert_gene(valid_gene)
self.assertEqual(Gene.objects.get(gene_id="test_gene01").gene_length, 3556)
upsert_gene(updated_gene)
self.assertEqual(Gene.objects.get(gene_id="test_gene01").gene_length, 356)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment