Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
793e61eb
Commit
793e61eb
authored
Dec 11, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
add script to create functional categories from manage
parent
e7f37e19
Pipeline
#19789
passed with stages
in 2 minutes and 24 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/management/commands/create_eggnog_functional_cat.py
0 → 100644
View file @
793e61eb
import
logging
from
django.core.management.base
import
BaseCommand
from
django.core.exceptions
import
ValidationError
from
metagenedb.apps.catalog.models
import
EggNogFunctionalCategory
from
metagenedb.common.utils.parsers.eggnog
import
EggNOGFunctionalCategoriesParser
logging
.
basicConfig
(
format
=
'[%(asctime)s] %(levelname)s:%(name)s:%(message)s'
)
logger
=
logging
.
getLogger
(
__name__
)
class
ImportEggNOGFunctionalCategories
(
object
):
LOOKUP_FIELD
=
'category_id'
MODEL
=
EggNogFunctionalCategory
GROUP_MAPPING
=
{
'Information storage and processing'
:
'info_storage_processing'
,
'Cellular processes and signaling'
:
'cellular_processes_signaling'
,
'Metabolism'
:
'metabolism'
,
'Poorly characterized'
:
'poorly_characterized'
}
def
__init__
(
self
,
file_path
):
self
.
parser
=
EggNOGFunctionalCategoriesParser
(
file_path
)
self
.
processed_count
=
0
self
.
created_count
=
0
self
.
updated_count
=
0
self
.
skipped_count
=
0
self
.
skipped_ids
=
[]
self
.
skipped_errors
=
[]
def
_create_instance
(
self
,
payload
):
instance
=
self
.
MODEL
(
**
payload
)
instance
.
full_clean
()
instance
.
save
()
self
.
created_count
+=
1
def
_update_instance
(
self
,
payload
):
instance
=
self
.
MODEL
.
objects
.
get
(
**
{
self
.
LOOKUP_FIELD
:
payload
.
get
(
self
.
LOOKUP_FIELD
)})
for
k
,
v
in
payload
.
items
():
setattr
(
instance
,
k
,
v
)
instance
.
full_clean
()
instance
.
save
()
self
.
updated_count
+=
1
def
_handle_error
(
self
,
payload
,
error
):
logger
.
error
(
error
)
self
.
skipped_errors
.
append
(
error
)
self
.
skipped_ids
.
append
(
payload
.
get
(
self
.
LOOKUP_FIELD
))
self
.
skipped_count
+=
1
def
update_group_name
(
self
,
functional_category
):
functional_category
[
'group'
]
=
self
.
GROUP_MAPPING
.
get
(
functional_category
[
'group'
],
None
)
return
functional_category
def
load_all
(
self
):
for
functional_category
in
self
.
parser
.
parse
():
functional_category
=
self
.
update_group_name
(
functional_category
)
try
:
self
.
_create_instance
(
functional_category
)
except
ValidationError
as
validation_error
:
if
self
.
LOOKUP_FIELD
in
validation_error
.
error_dict
.
keys
():
try
:
self
.
_update_instance
(
functional_category
)
except
ValidationError
as
validation_error
:
self
.
_handle_error
(
functional_category
,
validation_error
)
self
.
processed_count
+=
1
logger
.
info
(
"[DONE] %s EggNOG functional categories created."
,
self
.
created_count
)
logger
.
info
(
"[DONE] %s EggNOG functional categories updated."
,
self
.
updated_count
)
logger
.
info
(
"[DONE] %s EggNOG functional categories skipped. List: %s"
,
self
.
skipped_count
,
self
.
skipped_ids
)
class
Command
(
BaseCommand
):
help
=
'Create or update all EggNOG functional categories from COG_functional_categories.txt file.'
def
add_arguments
(
self
,
parser
):
parser
.
add_argument
(
'functional_categories'
,
help
=
'COG_functional_categories.txt file from EggNOG'
)
def
set_logger_level
(
self
,
verbosity
):
if
verbosity
>
2
:
logger
.
setLevel
(
logging
.
DEBUG
)
elif
verbosity
>
1
:
logger
.
setLevel
(
logging
.
INFO
)
def
handle
(
self
,
*
args
,
**
options
):
self
.
set_logger_level
(
int
(
options
[
'verbosity'
]))
import_functional_cat
=
ImportEggNOGFunctionalCategories
(
options
[
'functional_categories'
])
import_functional_cat
.
load_all
()
backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py
View file @
793e61eb
...
...
@@ -11,7 +11,7 @@ logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger
=
logging
.
getLogger
(
__name__
)
class
ImportEggN
og
(
object
):
class
ImportEggN
OG
(
object
):
def
__init__
(
self
,
file_path
):
self
.
annotation_file
=
file_path
...
...
@@ -71,20 +71,20 @@ class ImportEggNog(object):
eggnog
.
save
()
self
.
processed_count
+=
1
if
self
.
processed_count
%
1000
==
0
:
logger
.
info
(
"%s/%s EggN
og
processed so far..."
,
self
.
processed_count
,
self
.
total_eggnog_nb
)
logger
.
info
(
"%s/%s EggN
OG
processed so far..."
,
self
.
processed_count
,
self
.
total_eggnog_nb
)
if
test
:
break
logger
.
info
(
"[DONE] %s/%s EggN
og
created."
,
self
.
created_count
,
self
.
total_eggnog_nb
)
logger
.
info
(
"[DONE] %s/%s EggN
og
updated."
,
self
.
updated_count
,
self
.
total_eggnog_nb
)
logger
.
info
(
"[DONE] %s/%s EggN
og
skipped. List: %s"
,
self
.
skipped_count
,
self
.
total_eggnog_nb
,
logger
.
info
(
"[DONE] %s/%s EggN
OG
created."
,
self
.
created_count
,
self
.
total_eggnog_nb
)
logger
.
info
(
"[DONE] %s/%s EggN
OG
updated."
,
self
.
updated_count
,
self
.
total_eggnog_nb
)
logger
.
info
(
"[DONE] %s/%s EggN
OG
skipped. List: %s"
,
self
.
skipped_count
,
self
.
total_eggnog_nb
,
self
.
skipped_ids
)
class
Command
(
BaseCommand
):
help
=
'Create or update all Egg
nog
entries from annotations.tsv file.'
help
=
'Create or update all Egg
NOG
entries from annotations.tsv file.'
def
add_arguments
(
self
,
parser
):
parser
.
add_argument
(
'annotation'
,
help
=
'annotations.tsv file from EggN
og
'
)
parser
.
add_argument
(
'annotation'
,
help
=
'annotations.tsv file from EggN
OG
'
)
parser
.
add_argument
(
'--test'
,
action
=
'store_true'
,
help
=
'Run only on first 1000 entries.'
)
def
set_logger_level
(
self
,
verbosity
):
...
...
@@ -95,5 +95,5 @@ class Command(BaseCommand):
def
handle
(
self
,
*
args
,
**
options
):
self
.
set_logger_level
(
int
(
options
[
'verbosity'
]))
import_eggnog
=
ImportEggN
og
(
options
[
'annotation'
])
import_eggnog
=
ImportEggN
OG
(
options
[
'annotation'
])
import_eggnog
.
load_all
(
test
=
options
[
'test'
])
backend/metagenedb/apps/catalog/management/commands/test_create_eggnog_functional_cat.py
0 → 100644
View file @
793e61eb
from
unittest
import
TestCase
from
metagenedb.apps.catalog.management.commands.create_eggnog_functional_cat
import
ImportEggNOGFunctionalCategories
class
TestUpdateGroupName
(
TestCase
):
def
test_update_group_name_information
(
self
):
import_object
=
ImportEggNOGFunctionalCategories
(
'test'
)
payload
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'Information storage and processing'
}
expected_output
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'info_storage_processing'
}
self
.
assertDictEqual
(
import_object
.
update_group_name
(
payload
),
expected_output
)
def
test_update_group_name_cellular
(
self
):
import_object
=
ImportEggNOGFunctionalCategories
(
'test'
)
payload
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'Cellular processes and signaling'
}
expected_output
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'cellular_processes_signaling'
}
self
.
assertDictEqual
(
import_object
.
update_group_name
(
payload
),
expected_output
)
def
test_update_group_name_metabolism
(
self
):
import_object
=
ImportEggNOGFunctionalCategories
(
'test'
)
payload
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'Metabolism'
}
expected_output
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'metabolism'
}
self
.
assertDictEqual
(
import_object
.
update_group_name
(
payload
),
expected_output
)
def
test_update_group_name_poorly
(
self
):
import_object
=
ImportEggNOGFunctionalCategories
(
'test'
)
payload
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'Poorly characterized'
}
expected_output
=
{
'category_id'
:
'A'
,
'name'
:
'Test'
,
'group'
:
'poorly_characterized'
}
self
.
assertDictEqual
(
import_object
.
update_group_name
(
payload
),
expected_output
)
backend/metagenedb/common/utils/parsers/base.py
View file @
793e61eb
...
...
@@ -9,7 +9,8 @@ class FileParser:
"""
for
line
in
file_handler
:
print
(
line
.
rstrip
())
return
None
def
parse
(
self
):
with
open
(
self
.
file_path
,
'r'
)
as
file
:
self
.
handle_parsing
(
file
)
return
self
.
handle_parsing
(
file
)
backend/metagenedb/common/utils/parsers/eggnog.py
View file @
793e61eb
...
...
@@ -39,7 +39,7 @@ class EggNOGFunctionalCategoriesParser(FileParser):
functional_categories
.
append
({
'category_id'
:
elements
[
0
][
1
],
'name'
:
elements
[
1
],
'group'
:
current_group
'group'
:
current_group
.
capitalize
()
})
elif
line
:
# It is a group of a category
current_group
=
line
...
...
backend/metagenedb/common/utils/parsers/test_eggnog.py
View file @
793e61eb
...
...
@@ -63,9 +63,9 @@ class TestEggNOGFunctionalCategoriesParser(TestCase):
"SECOND GROUP
\n
"
,
" [C] Categorie name C
\n
"
,
" [D] Categorie name D
\n
"
,
]
expected_list
=
[
{
'category_id'
:
'A'
,
'group'
:
'F
IRST GROUP
'
,
'name'
:
'Categorie name A'
},
{
'category_id'
:
'B'
,
'group'
:
'F
IRST GROUP
'
,
'name'
:
'Categorie name B'
},
{
'category_id'
:
'C'
,
'group'
:
'S
ECOND GROUP
'
,
'name'
:
'Categorie name C'
},
{
'category_id'
:
'D'
,
'group'
:
'S
ECOND GROUP
'
,
'name'
:
'Categorie name D'
}
{
'category_id'
:
'A'
,
'group'
:
'F
irst group
'
,
'name'
:
'Categorie name A'
},
{
'category_id'
:
'B'
,
'group'
:
'F
irst group
'
,
'name'
:
'Categorie name B'
},
{
'category_id'
:
'C'
,
'group'
:
'S
econd group
'
,
'name'
:
'Categorie name C'
},
{
'category_id'
:
'D'
,
'group'
:
'S
econd group
'
,
'name'
:
'Categorie name D'
}
]
self
.
assertListEqual
(
parser
.
handle_parsing
(
fake_file_handler
),
expected_list
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment