Commit 7c1d40e3 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

improve eggnog creation

parent eff3e6ce
import logging
from django.core.management.base import BaseCommand
from django.db import IntegrityError
from django.core.exceptions import ValidationError
from metagenedb.apps.catalog.models import EggNog, EggNogFunctionalCategory
from metagenedb.common.utils.chunks import file_len
......@@ -21,6 +21,7 @@ class ImportEggNog(object):
self.updated_count = 0
self.skipped_count = 0
self.skipped_ids = []
self.skipped_errors = []
def _build_functional_category_dict(self):
all_categories = EggNogFunctionalCategory.objects.all()
......@@ -43,16 +44,26 @@ class ImportEggNog(object):
payload = {k: v for k, v in eggnog_dict.items() if v != ""}
try:
eggnog = EggNog(**payload)
eggnog.full_clean()
eggnog.save()
self.created_count += 1
except IntegrityError:
try:
eggnog = EggNog.objects.get(function_id=payload.get('function_id'))
for k, v in payload.items():
setattr(eggnog, k, v)
eggnog.save()
self.updated_count += 1
except IntegrityError:
except ValidationError as validation_error:
if 'function_id' in validation_error.error_dict.keys():
try:
eggnog = EggNog.objects.get(function_id=payload.get('function_id'))
for k, v in payload.items():
setattr(eggnog, k, v)
eggnog.full_clean()
eggnog.save()
self.updated_count += 1
except ValidationError as validation_error:
logger.error(validation_error)
self.skipped_errors.append(validation_error)
self.skipped_ids.append(payload.get('function_id'))
self.skipped_count += 1
else:
logger.error(validation_error)
self.skipped_errors.append(validation_error)
self.skipped_ids.append(payload.get('function_id'))
self.skipped_count += 1
self.processed_count += 1
......
# Generated by Django 3.0 on 2019-12-09 17:02
# Generated by Django 3.0 on 2019-12-09 18:52
from django.db import migrations
from django.db import migrations, models
class Migration(migrations.Migration):
......@@ -14,4 +14,9 @@ class Migration(migrations.Migration):
model_name='eggnog',
name='long_name',
),
migrations.AlterField(
model_name='function',
name='name',
field=models.CharField(blank=True, max_length=500),
),
]
# Generated by Django 3.0 on 2019-12-09 17:06
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('catalog', '0014_remove_eggnog_long_name'),
]
operations = [
migrations.AlterField(
model_name='function',
name='name',
field=models.CharField(max_length=200),
),
]
......@@ -12,7 +12,7 @@ class Function(models.Model):
]
function_id = models.CharField(max_length=100, db_index=True, unique=True)
name = models.CharField(max_length=200)
name = models.CharField(max_length=500, blank=True)
source = models.CharField(max_length=10, choices=SOURCE_CHOICES, default=UNDEFINED)
def __str__(self):
......
......@@ -15,7 +15,7 @@ class EggNogAnnotationLineParser(object):
return {
'functional_category': elements[2],
'function_id': elements[1],
'name': elements[3].rstrip(),
'name': elements[3].rstrip().split('.')[0],
}
except Exception:
_LOGGER.error(f"Could not parse: {line.rstrip()}. Are you sure it comes from eggnog annotations.tsv?")
......
......@@ -10,7 +10,8 @@ class TestEggNogAnnotationLineParser(TestCase):
expected_dict = {
'function_id': "28H54",
'name': "translational termination",
'functional_category': "K"
'
functional_category': "K"
}
test_dict = EggNogAnnotationLineParser.get_dict(ko_line)
self.assertDictEqual(test_dict, expected_dict)
......@@ -25,6 +26,16 @@ class TestEggNogAnnotationLineParser(TestCase):
test_dict = EggNogAnnotationLineParser.get_dict(ko_line)
self.assertDictEqual(test_dict, expected_dict)
def test_get_dict_long_name(self):
ko_line = "1\t28H50\tS\tGlucose-responsive transcription factor that regulates expression of several glucose transporter (HXT) genes in response to glucose. In the absence of glucose, it functions as a transcriptional repressor, whereas high concentrations of glucose cause it to function as a transcriptional activator. In cells growing on low levels of glucose, has a neutral role, neither repressing nor activating transcription (By similarity)\n" # noqa
expected_dict = {
'function_id': "28H50",
'name': "Glucose-responsive transcription factor that regulates expression of several glucose transporter (HXT) genes in response to glucose", # noqa
'functional_category': "S"
}
test_dict = EggNogAnnotationLineParser.get_dict(ko_line)
self.assertDictEqual(test_dict, expected_dict)
def test_get_dict_wrong_format(self):
ko_line = "This is a wrong line format, with; information and tab"
with self.assertRaises(Exception) as context: # noqa
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment