diff --git a/backend/metagenedb/utils/__init__.py b/backend/metagenedb/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/metagenedb/utils/parsers.py b/backend/metagenedb/utils/parsers.py
new file mode 100644
index 0000000000000000000000000000000000000000..a40e819b8f409fdee719cc154351efb9a978504c
--- /dev/null
+++ b/backend/metagenedb/utils/parsers.py
@@ -0,0 +1,50 @@
+import logging
+
+logging.basicConfig(level=logging.INFO)
+_LOGGER = logging.getLogger(__name__)
+
+
+def parse_ncbi_taxonomy_node(line):
+    """
+    parse line from ncbi nodes.dmp file
+
+    From documentation:
+
+    nodes.dmp file consists of taxonomy nodes.
+    The description for each node includes the following fields:
+
+        tax_id                                  -- node id in GenBank taxonomy database
+        parent tax_id                           -- parent node id in GenBank taxonomy database
+        rank                                    -- rank of this node (superkingdom, kingdom, ...)
+        embl code                               -- locus-name prefix; not unique
+        division id                             -- see division.dmp file
+        inherited div flag  (1 or 0)            -- 1 if node inherits division from parent
+        genetic code id                         -- see gencode.dmp file
+        inherited GC  flag  (1 or 0)            -- 1 if node inherits genetic code from parent
+        mitochondrial genetic code id           -- see gencode.dmp file
+        inherited MGC flag  (1 or 0)            -- 1 if node inherits mitochondrial gencode from parent
+        GenBank hidden flag (1 or 0)            -- 1 if name is suppressed in GenBank entry lineage
+        hidden subtree root flag (1 or 0)       -- 1 if this subtree has no sequence data yet
+        comments                                -- free-text comments and citations
+    """
+    elements = line.rstrip().split('|')
+    try:
+        parsed_line = {
+                "tax_id": elements[0].strip(),
+                "parent_tax_id": elements[1].strip(),
+                "rank": elements[2].strip(),
+                "embl_code": elements[3].strip(),
+                "division_id": elements[4].strip(),
+                "inherited_div_flag": elements[5].strip(),
+                "genetic_code_id": elements[6].strip(),
+                "inherited_GC_flag": elements[7].strip(),
+                "mitochondrial_genetic_code_id": elements[8].strip(),
+                "inherited_MGC_flag": elements[9].strip(),
+                "GenBank_hidden_flag": elements[10].strip(),
+                "hidden_subtree_root_flag": elements[11].strip(),
+                "comments": elements[12].strip()
+            }
+        return parsed_line
+    except Exception as e:
+        _LOGGER.error(f"Could not parse: {line.rstrip()}. Are you sure it comes from nodes.dmp file?")
+        raise(e)
diff --git a/backend/metagenedb/utils/test_parsers.py b/backend/metagenedb/utils/test_parsers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c895864f84920a5bfc9316ab8ec665728afa3b31
--- /dev/null
+++ b/backend/metagenedb/utils/test_parsers.py
@@ -0,0 +1,31 @@
+from unittest import TestCase
+
+from metagenedb.utils.parsers import parse_ncbi_taxonomy_node
+
+
+class TestNCBITaxonomyNodeParser(TestCase):
+
+    def test_parse_ncbi_taxonomy_node(self):
+        node_line = "6	|	335928	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|\n"
+        expected_dict = {
+            "tax_id": "6",
+            "parent_tax_id": "335928",
+            "rank": "genus",
+            "embl_code": "",
+            "division_id": "0",
+            "inherited_div_flag": "1",
+            "genetic_code_id": "11",
+            "inherited_GC_flag": "1",
+            "mitochondrial_genetic_code_id": "0",
+            "inherited_MGC_flag": "1",
+            "GenBank_hidden_flag": "0",
+            "hidden_subtree_root_flag": "0",
+            "comments": ""
+        }
+        test_dict = parse_ncbi_taxonomy_node(node_line)
+        self.assertDictEqual(test_dict, expected_dict)
+
+    def test_parse_wrong_line_format(self):
+        node_line = "This is a wrong line format."
+        with self.assertRaises(Exception) as context:
+            test_dict = parse_ncbi_taxonomy_node(node_line)