From 7c1d40e363f4f2e24577f9d42e3bae4b9bf9f47d Mon Sep 17 00:00:00 2001
From: Kenzo-Hugo Hillion <kenzo-hugo.hillion1@pasteur.fr>
Date: Tue, 10 Dec 2019 10:55:21 +0100
Subject: [PATCH] improve eggnog creation

---
 .../commands/create_update_eggnog.py          | 29 +++++++++++++------
 .../migrations/0014_eggnog_name_length.py     | 22 ++++++++++++++
 .../0014_remove_eggnog_long_name.py           | 17 -----------
 .../0015_increase_function_name_max_length.py | 18 ------------
 .../apps/catalog/models/function.py           |  2 +-
 .../metagenedb/common/utils/parsers/eggnog.py |  2 +-
 .../common/utils/parsers/test_eggnog.py       | 13 ++++++++-
 7 files changed, 56 insertions(+), 47 deletions(-)
 create mode 100644 backend/metagenedb/apps/catalog/migrations/0014_eggnog_name_length.py
 delete mode 100644 backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py
 delete mode 100644 backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py

diff --git a/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py b/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py
index 56f5f7a..75a4d90 100644
--- a/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py
+++ b/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py
@@ -1,7 +1,7 @@
 import logging
 
 from django.core.management.base import BaseCommand
-from django.db import IntegrityError
+from django.core.exceptions import ValidationError
 
 from metagenedb.apps.catalog.models import EggNog, EggNogFunctionalCategory
 from metagenedb.common.utils.chunks import file_len
@@ -21,6 +21,7 @@ class ImportEggNog(object):
         self.updated_count = 0
         self.skipped_count = 0
         self.skipped_ids = []
+        self.skipped_errors = []
 
     def _build_functional_category_dict(self):
         all_categories = EggNogFunctionalCategory.objects.all()
@@ -43,16 +44,26 @@ class ImportEggNog(object):
                 payload = {k: v for k, v in eggnog_dict.items() if v != ""}
                 try:
                     eggnog = EggNog(**payload)
+                    eggnog.full_clean()
                     eggnog.save()
                     self.created_count += 1
-                except IntegrityError:
-                    try:
-                        eggnog = EggNog.objects.get(function_id=payload.get('function_id'))
-                        for k, v in payload.items():
-                            setattr(eggnog, k, v)
-                        eggnog.save()
-                        self.updated_count += 1
-                    except IntegrityError:
+                except ValidationError as validation_error:
+                    if 'function_id' in validation_error.error_dict.keys():
+                        try:
+                            eggnog = EggNog.objects.get(function_id=payload.get('function_id'))
+                            for k, v in payload.items():
+                                setattr(eggnog, k, v)
+                            eggnog.full_clean()
+                            eggnog.save()
+                            self.updated_count += 1
+                        except ValidationError as validation_error:
+                            logger.error(validation_error)
+                            self.skipped_errors.append(validation_error)
+                            self.skipped_ids.append(payload.get('function_id'))
+                            self.skipped_count += 1
+                    else:
+                        logger.error(validation_error)
+                        self.skipped_errors.append(validation_error)
                         self.skipped_ids.append(payload.get('function_id'))
                         self.skipped_count += 1
                 self.processed_count += 1
diff --git a/backend/metagenedb/apps/catalog/migrations/0014_eggnog_name_length.py b/backend/metagenedb/apps/catalog/migrations/0014_eggnog_name_length.py
new file mode 100644
index 0000000..5a39f54
--- /dev/null
+++ b/backend/metagenedb/apps/catalog/migrations/0014_eggnog_name_length.py
@@ -0,0 +1,22 @@
+# Generated by Django 3.0 on 2019-12-09 18:52
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('catalog', '0013_plural_eggnog'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='eggnog',
+            name='long_name',
+        ),
+        migrations.AlterField(
+            model_name='function',
+            name='name',
+            field=models.CharField(blank=True, max_length=500),
+        ),
+    ]
diff --git a/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py b/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py
deleted file mode 100644
index f2b03bf..0000000
--- a/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Generated by Django 3.0 on 2019-12-09 17:02
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('catalog', '0013_plural_eggnog'),
-    ]
-
-    operations = [
-        migrations.RemoveField(
-            model_name='eggnog',
-            name='long_name',
-        ),
-    ]
diff --git a/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py b/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py
deleted file mode 100644
index 5627b6c..0000000
--- a/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Generated by Django 3.0 on 2019-12-09 17:06
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('catalog', '0014_remove_eggnog_long_name'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='function',
-            name='name',
-            field=models.CharField(max_length=200),
-        ),
-    ]
diff --git a/backend/metagenedb/apps/catalog/models/function.py b/backend/metagenedb/apps/catalog/models/function.py
index 2723416..cf27a27 100644
--- a/backend/metagenedb/apps/catalog/models/function.py
+++ b/backend/metagenedb/apps/catalog/models/function.py
@@ -12,7 +12,7 @@ class Function(models.Model):
     ]
 
     function_id = models.CharField(max_length=100, db_index=True, unique=True)
-    name = models.CharField(max_length=200)
+    name = models.CharField(max_length=500, blank=True)
     source = models.CharField(max_length=10, choices=SOURCE_CHOICES, default=UNDEFINED)
 
     def __str__(self):
diff --git a/backend/metagenedb/common/utils/parsers/eggnog.py b/backend/metagenedb/common/utils/parsers/eggnog.py
index 70b8aeb..a24485f 100644
--- a/backend/metagenedb/common/utils/parsers/eggnog.py
+++ b/backend/metagenedb/common/utils/parsers/eggnog.py
@@ -15,7 +15,7 @@ class EggNogAnnotationLineParser(object):
             return {
                 'functional_category': elements[2],
                 'function_id': elements[1],
-                'name': elements[3].rstrip(),
+                'name': elements[3].rstrip().split('.')[0],
             }
         except Exception:
             _LOGGER.error(f"Could not parse: {line.rstrip()}. Are you sure it comes from eggnog annotations.tsv?")
diff --git a/backend/metagenedb/common/utils/parsers/test_eggnog.py b/backend/metagenedb/common/utils/parsers/test_eggnog.py
index ab22354..3dce9a6 100644
--- a/backend/metagenedb/common/utils/parsers/test_eggnog.py
+++ b/backend/metagenedb/common/utils/parsers/test_eggnog.py
@@ -10,7 +10,8 @@ class TestEggNogAnnotationLineParser(TestCase):
         expected_dict = {
                 'function_id': "28H54",
                 'name': "translational termination",
-                'functional_category': "K"
+                '
+                functional_category': "K"
             }
         test_dict = EggNogAnnotationLineParser.get_dict(ko_line)
         self.assertDictEqual(test_dict, expected_dict)
@@ -25,6 +26,16 @@ class TestEggNogAnnotationLineParser(TestCase):
         test_dict = EggNogAnnotationLineParser.get_dict(ko_line)
         self.assertDictEqual(test_dict, expected_dict)
 
+    def test_get_dict_long_name(self):
+        ko_line = "1\t28H50\tS\tGlucose-responsive transcription factor that regulates expression of several glucose transporter (HXT) genes in response to glucose. In the absence of glucose, it functions as a transcriptional repressor, whereas high concentrations of glucose cause it to function as a transcriptional activator. In cells growing on low levels of glucose, has a neutral role, neither repressing nor activating transcription (By similarity)\n"  # noqa
+        expected_dict = {
+                'function_id': "28H50",
+                'name': "Glucose-responsive transcription factor that regulates expression of several glucose transporter (HXT) genes in response to glucose",  # noqa
+                'functional_category': "S"
+            }
+        test_dict = EggNogAnnotationLineParser.get_dict(ko_line)
+        self.assertDictEqual(test_dict, expected_dict)
+
     def test_get_dict_wrong_format(self):
         ko_line = "This is a wrong line format, with; information   and tab"
         with self.assertRaises(Exception) as context:  # noqa
-- 
GitLab