Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
c78cb131
Commit
c78cb131
authored
Dec 11, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
parser for eggnog functional categories
parent
fe09bec9
Pipeline
#19770
failed with stages
in 2 minutes and 18 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py
View file @
c78cb131
...
...
@@ -5,7 +5,7 @@ from django.core.exceptions import ValidationError
from
metagenedb.apps.catalog.models
import
EggNog
,
EggNogFunctionalCategory
from
metagenedb.common.utils.chunks
import
file_len
from
metagenedb.common.utils.parsers
import
EggN
og
AnnotationLineParser
from
metagenedb.common.utils.parsers
import
EggN
OG
AnnotationLineParser
logging
.
basicConfig
(
format
=
'[%(asctime)s] %(levelname)s:%(name)s:%(message)s'
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -15,7 +15,7 @@ class ImportEggNog(object):
def
__init__
(
self
,
file_path
):
self
.
annotation_file
=
file_path
self
.
eggnog_parser
=
EggN
og
AnnotationLineParser
()
self
.
eggnog_parser
=
EggN
OG
AnnotationLineParser
()
self
.
processed_count
=
0
self
.
created_count
=
0
self
.
updated_count
=
0
...
...
backend/metagenedb/common/utils/parsers/__init__.py
View file @
c78cb131
from
.eggnog
import
EggN
og
AnnotationLineParser
# noqa
from
.eggnog
import
EggN
OG
AnnotationLineParser
# noqa
from
.igc
import
IGCLineParser
# noqa
from
.kegg
import
KEGGLineParser
# noqa
from
.ncbi_taxonomy
import
NCBITaxonomyLineParser
# noqa
backend/metagenedb/common/utils/parsers/base.py
0 → 100644
View file @
c78cb131
class
FileParser
:
def
__init__
(
self
,
file_path
):
self
.
file_path
=
file_path
def
handle_parsing
(
self
,
file_handler
):
"""
This method need to be overloaded to really handle the parsing
"""
for
line
in
file_handler
:
print
(
line
.
rstrip
())
def
parse
(
self
):
with
open
(
self
.
file_path
,
'r'
)
as
file
:
self
.
handle_parsing
(
file
)
backend/metagenedb/common/utils/parsers/eggnog.py
View file @
c78cb131
import
logging
from
.base
import
FileParser
_LOGGER
=
logging
.
getLogger
(
__name__
)
class
EggN
og
AnnotationLineParser
(
object
)
:
class
EggN
OG
AnnotationLineParser
:
@
staticmethod
def
get_dict
(
line
):
...
...
@@ -19,4 +21,26 @@ class EggNogAnnotationLineParser(object):
}
except
Exception
:
_LOGGER
.
error
(
f
"Could not parse:
{
line
.
rstrip
()
}
. Are you sure it comes from eggnog annotations.tsv?"
)
raise
raise
Exception
(
"Impossible to parse given line as eggnog from annotation.tsv file"
)
class
EggNOGFunctionalCategoriesParser
(
FileParser
):
"""
Parse functional categories file from EggNOG
"""
def
handle_parsing
(
self
,
file_handler
):
functional_categories
=
[]
current_group
=
"Unknown"
for
line
in
file_handler
:
line
=
line
.
strip
()
if
line
.
startswith
(
'['
):
# It is a category
elements
=
line
.
split
(
maxsplit
=
1
)
functional_categories
.
append
({
'category_id'
:
elements
[
0
][
1
],
'name'
:
elements
[
1
],
'group'
:
current_group
})
elif
line
:
# It is a group of a category
current_group
=
line
return
functional_categories
backend/metagenedb/common/utils/parsers/test_eggnog.py
View file @
c78cb131
from
unittest
import
TestCase
from
metagenedb.common.utils.parsers
import
EggNogAnnotationLineParser
from
metagenedb.common.utils.parsers.eggnog
import
(
EggNOGAnnotationLineParser
,
EggNOGFunctionalCategoriesParser
)
class
TestEggN
og
AnnotationLineParser
(
TestCase
):
class
TestEggN
OG
AnnotationLineParser
(
TestCase
):
def
test_get_dict
(
self
):
ko_line
=
"1
\t
28H54
\t
K
\t
translational termination
\n
"
...
...
@@ -12,7 +15,7 @@ class TestEggNogAnnotationLineParser(TestCase):
'name'
:
"translational termination"
,
'functional_categories'
:
[
"K"
]
}
test_dict
=
EggN
og
AnnotationLineParser
.
get_dict
(
ko_line
)
test_dict
=
EggN
OG
AnnotationLineParser
.
get_dict
(
ko_line
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
def
test_get_dict_no_name
(
self
):
...
...
@@ -22,7 +25,7 @@ class TestEggNogAnnotationLineParser(TestCase):
'name'
:
""
,
'functional_categories'
:
[
"S"
]
}
test_dict
=
EggN
og
AnnotationLineParser
.
get_dict
(
ko_line
)
test_dict
=
EggN
OG
AnnotationLineParser
.
get_dict
(
ko_line
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
def
test_get_dict_long_name
(
self
):
...
...
@@ -32,7 +35,7 @@ class TestEggNogAnnotationLineParser(TestCase):
'name'
:
"Glucose-responsive transcription factor that regulates expression of several glucose transporter (HXT) genes in response to glucose"
,
# noqa
'functional_categories'
:
[
"S"
]
}
test_dict
=
EggN
og
AnnotationLineParser
.
get_dict
(
ko_line
)
test_dict
=
EggN
OG
AnnotationLineParser
.
get_dict
(
ko_line
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
def
test_get_dict_multi_categories
(
self
):
...
...
@@ -42,10 +45,27 @@ class TestEggNogAnnotationLineParser(TestCase):
'name'
:
"translational termination"
,
'functional_categories'
:
[
"K"
,
"S"
]
}
test_dict
=
EggN
og
AnnotationLineParser
.
get_dict
(
ko_line
)
test_dict
=
EggN
OG
AnnotationLineParser
.
get_dict
(
ko_line
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
def
test_get_dict_wrong_format
(
self
):
ko_line
=
"This is a wrong line format, with; information and tab"
with
self
.
assertRaises
(
Exception
)
as
context
:
# noqa
EggNogAnnotationLineParser
.
get_dict
(
ko_line
)
EggNOGAnnotationLineParser
.
get_dict
(
ko_line
)
class
TestEggNOGFunctionalCategoriesParser
(
TestCase
):
def
test_parse_file
(
self
):
parser
=
EggNOGFunctionalCategoriesParser
(
"test"
)
fake_file_handler
=
[
"FIRST GROUP
\n
"
,
" [A] Categorie name A
\n
"
,
" [B] Categorie name B
\n
"
,
"
\n
"
,
"SECOND GROUP
\n
"
,
" [C] Categorie name C
\n
"
,
" [D] Categorie name D
\n
"
,
]
expected_list
=
[
{
'category_id'
:
'A'
,
'group'
:
'FIRST GROUP'
,
'name'
:
'Categorie name A'
},
{
'category_id'
:
'B'
,
'group'
:
'FIRST GROUP'
,
'name'
:
'Categorie name B'
},
{
'category_id'
:
'C'
,
'group'
:
'SECOND GROUP'
,
'name'
:
'Categorie name C'
},
{
'category_id'
:
'D'
,
'group'
:
'SECOND GROUP'
,
'name'
:
'Categorie name D'
}
]
self
.
assertListEqual
(
parser
.
handle_parsing
(
fake_file_handler
),
expected_list
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment