diff --git a/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py b/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py index 56f5f7a1d34bfeb4869fa29fea53f3d5cf002481..75a4d902d16b0a0b673613b5485c5481fb752c59 100644 --- a/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py +++ b/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py @@ -1,7 +1,7 @@ import logging from django.core.management.base import BaseCommand -from django.db import IntegrityError +from django.core.exceptions import ValidationError from metagenedb.apps.catalog.models import EggNog, EggNogFunctionalCategory from metagenedb.common.utils.chunks import file_len @@ -21,6 +21,7 @@ class ImportEggNog(object): self.updated_count = 0 self.skipped_count = 0 self.skipped_ids = [] + self.skipped_errors = [] def _build_functional_category_dict(self): all_categories = EggNogFunctionalCategory.objects.all() @@ -43,16 +44,26 @@ class ImportEggNog(object): payload = {k: v for k, v in eggnog_dict.items() if v != ""} try: eggnog = EggNog(**payload) + eggnog.full_clean() eggnog.save() self.created_count += 1 - except IntegrityError: - try: - eggnog = EggNog.objects.get(function_id=payload.get('function_id')) - for k, v in payload.items(): - setattr(eggnog, k, v) - eggnog.save() - self.updated_count += 1 - except IntegrityError: + except ValidationError as validation_error: + if 'function_id' in validation_error.error_dict.keys(): + try: + eggnog = EggNog.objects.get(function_id=payload.get('function_id')) + for k, v in payload.items(): + setattr(eggnog, k, v) + eggnog.full_clean() + eggnog.save() + self.updated_count += 1 + except ValidationError as validation_error: + logger.error(validation_error) + self.skipped_errors.append(validation_error) + self.skipped_ids.append(payload.get('function_id')) + self.skipped_count += 1 + else: + logger.error(validation_error) + self.skipped_errors.append(validation_error) self.skipped_ids.append(payload.get('function_id')) self.skipped_count += 1 self.processed_count += 1 diff --git a/backend/metagenedb/apps/catalog/migrations/0014_eggnog_name_length.py b/backend/metagenedb/apps/catalog/migrations/0014_eggnog_name_length.py new file mode 100644 index 0000000000000000000000000000000000000000..5a39f54cce132270c35229ca545cb588708dcc42 --- /dev/null +++ b/backend/metagenedb/apps/catalog/migrations/0014_eggnog_name_length.py @@ -0,0 +1,22 @@ +# Generated by Django 3.0 on 2019-12-09 18:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('catalog', '0013_plural_eggnog'), + ] + + operations = [ + migrations.RemoveField( + model_name='eggnog', + name='long_name', + ), + migrations.AlterField( + model_name='function', + name='name', + field=models.CharField(blank=True, max_length=500), + ), + ] diff --git a/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py b/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py deleted file mode 100644 index f2b03bf40e956740f4fe62495e09800c25e6e25d..0000000000000000000000000000000000000000 --- a/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 3.0 on 2019-12-09 17:02 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ('catalog', '0013_plural_eggnog'), - ] - - operations = [ - migrations.RemoveField( - model_name='eggnog', - name='long_name', - ), - ] diff --git a/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py b/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py deleted file mode 100644 index 5627b6c2b474d827d82b4664df1ddf080a16a835..0000000000000000000000000000000000000000 --- a/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.0 on 2019-12-09 17:06 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('catalog', '0014_remove_eggnog_long_name'), - ] - - operations = [ - migrations.AlterField( - model_name='function', - name='name', - field=models.CharField(max_length=200), - ), - ] diff --git a/backend/metagenedb/apps/catalog/models/function.py b/backend/metagenedb/apps/catalog/models/function.py index 2723416c8114ada5f61138ac811d92df9d27ff0e..cf27a27a0459f81bb5414054be32e1e10baa6bfb 100644 --- a/backend/metagenedb/apps/catalog/models/function.py +++ b/backend/metagenedb/apps/catalog/models/function.py @@ -12,7 +12,7 @@ class Function(models.Model): ] function_id = models.CharField(max_length=100, db_index=True, unique=True) - name = models.CharField(max_length=200) + name = models.CharField(max_length=500, blank=True) source = models.CharField(max_length=10, choices=SOURCE_CHOICES, default=UNDEFINED) def __str__(self): diff --git a/backend/metagenedb/common/utils/parsers/eggnog.py b/backend/metagenedb/common/utils/parsers/eggnog.py index 70b8aeb5993df653e16533e47d678e40068879ca..a24485f9b9f31420584c2a5ae72b7e0d426e4a66 100644 --- a/backend/metagenedb/common/utils/parsers/eggnog.py +++ b/backend/metagenedb/common/utils/parsers/eggnog.py @@ -15,7 +15,7 @@ class EggNogAnnotationLineParser(object): return { 'functional_category': elements[2], 'function_id': elements[1], - 'name': elements[3].rstrip(), + 'name': elements[3].rstrip().split('.')[0], } except Exception: _LOGGER.error(f"Could not parse: {line.rstrip()}. Are you sure it comes from eggnog annotations.tsv?") diff --git a/backend/metagenedb/common/utils/parsers/test_eggnog.py b/backend/metagenedb/common/utils/parsers/test_eggnog.py index ab22354d8ad18bde9420114c9b8fc1ccf9076245..3dce9a66ebe5fa9bf6eee273a239d13bf9e8da5f 100644 --- a/backend/metagenedb/common/utils/parsers/test_eggnog.py +++ b/backend/metagenedb/common/utils/parsers/test_eggnog.py @@ -10,7 +10,8 @@ class TestEggNogAnnotationLineParser(TestCase): expected_dict = { 'function_id': "28H54", 'name': "translational termination", - 'functional_category': "K" + ' + functional_category': "K" } test_dict = EggNogAnnotationLineParser.get_dict(ko_line) self.assertDictEqual(test_dict, expected_dict) @@ -25,6 +26,16 @@ class TestEggNogAnnotationLineParser(TestCase): test_dict = EggNogAnnotationLineParser.get_dict(ko_line) self.assertDictEqual(test_dict, expected_dict) + def test_get_dict_long_name(self): + ko_line = "1\t28H50\tS\tGlucose-responsive transcription factor that regulates expression of several glucose transporter (HXT) genes in response to glucose. In the absence of glucose, it functions as a transcriptional repressor, whereas high concentrations of glucose cause it to function as a transcriptional activator. In cells growing on low levels of glucose, has a neutral role, neither repressing nor activating transcription (By similarity)\n" # noqa + expected_dict = { + 'function_id': "28H50", + 'name': "Glucose-responsive transcription factor that regulates expression of several glucose transporter (HXT) genes in response to glucose", # noqa + 'functional_category': "S" + } + test_dict = EggNogAnnotationLineParser.get_dict(ko_line) + self.assertDictEqual(test_dict, expected_dict) + def test_get_dict_wrong_format(self): ko_line = "This is a wrong line format, with; information and tab" with self.assertRaises(Exception) as context: # noqa