From 7f863a0d6c145ec442ca4dcfb8a55f4bebfb80e4 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion <kenzo-hugo.hillion1@pasteur.fr> Date: Mon, 9 Dec 2019 17:37:00 +0100 Subject: [PATCH] add eggnog line parser --- .../common/utils/parsers/__init__.py | 1 + .../metagenedb/common/utils/parsers/eggnog.py | 22 +++++++++++++++++++ .../common/utils/parsers/test_eggnog.py | 21 ++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 backend/metagenedb/common/utils/parsers/eggnog.py create mode 100644 backend/metagenedb/common/utils/parsers/test_eggnog.py diff --git a/backend/metagenedb/common/utils/parsers/__init__.py b/backend/metagenedb/common/utils/parsers/__init__.py index 7c8b8f5..92f59fa 100644 --- a/backend/metagenedb/common/utils/parsers/__init__.py +++ b/backend/metagenedb/common/utils/parsers/__init__.py @@ -1,3 +1,4 @@ +from .eggnog import EggNogAnnotationLineParser # noqa from .igc import IGCLineParser # noqa from .kegg import KEGGLineParser # noqa from .ncbi_taxonomy import NCBITaxonomyLineParser # noqa diff --git a/backend/metagenedb/common/utils/parsers/eggnog.py b/backend/metagenedb/common/utils/parsers/eggnog.py new file mode 100644 index 0000000..4397493 --- /dev/null +++ b/backend/metagenedb/common/utils/parsers/eggnog.py @@ -0,0 +1,22 @@ +import logging + +_LOGGER = logging.getLogger(__name__) + + +class EggNogAnnotationLineParser(object): + + @staticmethod + def ko_list(line): + """ + Parse line from Eggnog annotations.tsv file to return organized dict + """ + try: + elements = line.split('\t') + return { + 'functional_category': elements[2], + 'function_id': elements[1], + 'name': elements[3], + } + except Exception: + _LOGGER.error(f"Could not parse: {line.rstrip()}. Are you sure it comes from eggnog annotations.tsv?") + raise diff --git a/backend/metagenedb/common/utils/parsers/test_eggnog.py b/backend/metagenedb/common/utils/parsers/test_eggnog.py new file mode 100644 index 0000000..0f48ef7 --- /dev/null +++ b/backend/metagenedb/common/utils/parsers/test_eggnog.py @@ -0,0 +1,21 @@ +from unittest import TestCase + +from metagenedb.common.utils.parsers import EggNogAnnotationLineParser + + +class TestEggNogAnnotationLineParser(TestCase): + + def test_ko_list(self): + ko_line = "1\t28H54\tK\ttranslational termination" + expected_dict = { + 'function_id': "28H54", + 'name': "translational termination", + 'functional_category': "K" + } + test_dict = EggNogAnnotationLineParser.ko_list(ko_line) + self.assertDictEqual(test_dict, expected_dict) + + def test_ko_list_wrong_format(self): + ko_line = "This is a wrong line format, with; information and tab" + with self.assertRaises(Exception) as context: # noqa + EggNogAnnotationLineParser.ko_list(ko_line) -- GitLab