From 7f863a0d6c145ec442ca4dcfb8a55f4bebfb80e4 Mon Sep 17 00:00:00 2001
From: Kenzo-Hugo Hillion <kenzo-hugo.hillion1@pasteur.fr>
Date: Mon, 9 Dec 2019 17:37:00 +0100
Subject: [PATCH] add eggnog line parser

---
 .../common/utils/parsers/__init__.py          |  1 +
 .../metagenedb/common/utils/parsers/eggnog.py | 22 +++++++++++++++++++
 .../common/utils/parsers/test_eggnog.py       | 21 ++++++++++++++++++
 3 files changed, 44 insertions(+)
 create mode 100644 backend/metagenedb/common/utils/parsers/eggnog.py
 create mode 100644 backend/metagenedb/common/utils/parsers/test_eggnog.py

diff --git a/backend/metagenedb/common/utils/parsers/__init__.py b/backend/metagenedb/common/utils/parsers/__init__.py
index 7c8b8f5..92f59fa 100644
--- a/backend/metagenedb/common/utils/parsers/__init__.py
+++ b/backend/metagenedb/common/utils/parsers/__init__.py
@@ -1,3 +1,4 @@
+from .eggnog import EggNogAnnotationLineParser  # noqa
 from .igc import IGCLineParser  # noqa
 from .kegg import KEGGLineParser  # noqa
 from .ncbi_taxonomy import NCBITaxonomyLineParser  # noqa
diff --git a/backend/metagenedb/common/utils/parsers/eggnog.py b/backend/metagenedb/common/utils/parsers/eggnog.py
new file mode 100644
index 0000000..4397493
--- /dev/null
+++ b/backend/metagenedb/common/utils/parsers/eggnog.py
@@ -0,0 +1,22 @@
+import logging
+
+_LOGGER = logging.getLogger(__name__)
+
+
+class EggNogAnnotationLineParser(object):
+
+    @staticmethod
+    def ko_list(line):
+        """
+        Parse line from Eggnog annotations.tsv file to return organized dict
+        """
+        try:
+            elements = line.split('\t')
+            return {
+                'functional_category': elements[2],
+                'function_id': elements[1],
+                'name': elements[3],
+            }
+        except Exception:
+            _LOGGER.error(f"Could not parse: {line.rstrip()}. Are you sure it comes from eggnog annotations.tsv?")
+            raise
diff --git a/backend/metagenedb/common/utils/parsers/test_eggnog.py b/backend/metagenedb/common/utils/parsers/test_eggnog.py
new file mode 100644
index 0000000..0f48ef7
--- /dev/null
+++ b/backend/metagenedb/common/utils/parsers/test_eggnog.py
@@ -0,0 +1,21 @@
+from unittest import TestCase
+
+from metagenedb.common.utils.parsers import EggNogAnnotationLineParser
+
+
+class TestEggNogAnnotationLineParser(TestCase):
+
+    def test_ko_list(self):
+        ko_line = "1\t28H54\tK\ttranslational termination"
+        expected_dict = {
+                'function_id': "28H54",
+                'name': "translational termination",
+                'functional_category': "K"
+            }
+        test_dict = EggNogAnnotationLineParser.ko_list(ko_line)
+        self.assertDictEqual(test_dict, expected_dict)
+
+    def test_ko_list_wrong_format(self):
+        ko_line = "This is a wrong line format, with; information   and tab"
+        with self.assertRaises(Exception) as context:  # noqa
+            EggNogAnnotationLineParser.ko_list(ko_line)
-- 
GitLab