diff --git a/backend/metagenedb/common/utils/parsers/__init__.py b/backend/metagenedb/common/utils/parsers/__init__.py index 7c8b8f5616f3ab00a535864b65bf390c1e5ac0f7..92f59fa48f3eec05d8a2e62b16083c3ccf4cfe3e 100644 --- a/backend/metagenedb/common/utils/parsers/__init__.py +++ b/backend/metagenedb/common/utils/parsers/__init__.py @@ -1,3 +1,4 @@ +from .eggnog import EggNogAnnotationLineParser # noqa from .igc import IGCLineParser # noqa from .kegg import KEGGLineParser # noqa from .ncbi_taxonomy import NCBITaxonomyLineParser # noqa diff --git a/backend/metagenedb/common/utils/parsers/eggnog.py b/backend/metagenedb/common/utils/parsers/eggnog.py new file mode 100644 index 0000000000000000000000000000000000000000..4397493a0bd782393dfb932e628a19d8245a7fe6 --- /dev/null +++ b/backend/metagenedb/common/utils/parsers/eggnog.py @@ -0,0 +1,22 @@ +import logging + +_LOGGER = logging.getLogger(__name__) + + +class EggNogAnnotationLineParser(object): + + @staticmethod + def ko_list(line): + """ + Parse line from Eggnog annotations.tsv file to return organized dict + """ + try: + elements = line.split('\t') + return { + 'functional_category': elements[2], + 'function_id': elements[1], + 'name': elements[3], + } + except Exception: + _LOGGER.error(f"Could not parse: {line.rstrip()}. Are you sure it comes from eggnog annotations.tsv?") + raise diff --git a/backend/metagenedb/common/utils/parsers/test_eggnog.py b/backend/metagenedb/common/utils/parsers/test_eggnog.py new file mode 100644 index 0000000000000000000000000000000000000000..0f48ef78a77b3d0b79ee22802587b847ac6fceaf --- /dev/null +++ b/backend/metagenedb/common/utils/parsers/test_eggnog.py @@ -0,0 +1,21 @@ +from unittest import TestCase + +from metagenedb.common.utils.parsers import EggNogAnnotationLineParser + + +class TestEggNogAnnotationLineParser(TestCase): + + def test_ko_list(self): + ko_line = "1\t28H54\tK\ttranslational termination" + expected_dict = { + 'function_id': "28H54", + 'name': "translational termination", + 'functional_category': "K" + } + test_dict = EggNogAnnotationLineParser.ko_list(ko_line) + self.assertDictEqual(test_dict, expected_dict) + + def test_ko_list_wrong_format(self): + ko_line = "This is a wrong line format, with; information and tab" + with self.assertRaises(Exception) as context: # noqa + EggNogAnnotationLineParser.ko_list(ko_line)