Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
09458ad3
Commit
09458ad3
authored
Dec 09, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Add script to load EggNog into the db
parent
7f863a0d
Changes
9
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/admin/function.py
View file @
09458ad3
...
...
@@ -20,7 +20,7 @@ class FunctionAdmin(admin.ModelAdmin):
@
admin
.
register
(
EggNog
)
class
EggNogAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'function_id'
,
'name'
,
'
long_name
'
)
list_display
=
(
'function_id'
,
'name'
,
'
functional_category
'
)
search_fields
=
(
'function_id'
,
'name'
)
...
...
backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py
0 → 100644
View file @
09458ad3
import
logging
from
django.core.management.base
import
BaseCommand
from
django.db
import
IntegrityError
from
metagenedb.apps.catalog.models
import
EggNog
,
EggNogFunctionalCategory
from
metagenedb.common.utils.chunks
import
file_len
from
metagenedb.common.utils.parsers
import
EggNogAnnotationLineParser
logging
.
basicConfig
(
format
=
'[%(asctime)s] %(levelname)s:%(name)s:%(message)s'
)
logger
=
logging
.
getLogger
(
__name__
)
class
ImportEggNog
(
object
):
def
__init__
(
self
,
file_path
):
self
.
annotation_file
=
file_path
self
.
eggnog_parser
=
EggNogAnnotationLineParser
()
self
.
processed_count
=
0
self
.
created_count
=
0
self
.
updated_count
=
0
self
.
skipped_count
=
0
self
.
skipped_ids
=
[]
def
_build_functional_category_dict
(
self
):
all_categories
=
EggNogFunctionalCategory
.
objects
.
all
()
self
.
functional_cat
=
{
cat
.
category_id
:
cat
for
cat
in
all_categories
}
def
link_functional_category
(
self
,
eggnog_dict
):
cat_key
=
eggnog_dict
.
get
(
'functional_category'
,
'S'
)
category
=
self
.
functional_cat
.
get
(
cat_key
)
eggnog_dict
.
update
({
'functional_category'
:
category
})
def
load_all
(
self
,
test
=
False
):
self
.
_build_functional_category_dict
()
self
.
total_eggnog_nb
=
file_len
(
self
.
annotation_file
)
with
open
(
self
.
annotation_file
,
"r"
)
as
file
:
for
line
in
file
:
eggnog_dict
=
self
.
eggnog_parser
.
get_dict
(
line
)
self
.
link_functional_category
(
eggnog_dict
)
payload
=
{
k
:
v
for
k
,
v
in
eggnog_dict
.
items
()
if
v
!=
""
}
try
:
eggnog
=
EggNog
(
**
payload
)
eggnog
.
save
()
self
.
created_count
+=
1
except
IntegrityError
:
try
:
eggnog
=
EggNog
.
objects
.
get
(
function_id
=
payload
.
get
(
'function_id'
))
for
k
,
v
in
payload
.
items
():
setattr
(
eggnog
,
k
,
v
)
eggnog
.
save
()
self
.
updated_count
+=
1
except
IntegrityError
:
self
.
skipped_ids
.
append
(
payload
.
get
(
'function_id'
))
self
.
skipped_count
+=
1
self
.
processed_count
+=
1
if
self
.
processed_count
%
1000
==
0
:
logger
.
info
(
"%s/%s EggNog processed so far..."
,
self
.
processed_count
,
self
.
total_eggnog_nb
)
if
test
:
break
logger
.
info
(
"[DONE] %s/%s EggNog created."
,
self
.
created_count
,
self
.
total_eggnog_nb
)
logger
.
info
(
"[DONE] %s/%s EggNog updated."
,
self
.
updated_count
,
self
.
total_eggnog_nb
)
logger
.
info
(
"[DONE] %s/%s EggNog skipped. List: %s"
,
self
.
skipped_count
,
self
.
total_eggnog_nb
,
self
.
skipped_ids
)
class
Command
(
BaseCommand
):
help
=
'Create or update all Eggnog entries from annotations.tsv file.'
def
add_arguments
(
self
,
parser
):
parser
.
add_argument
(
'annotation'
,
help
=
'annotations.tsv file from EggNog'
)
parser
.
add_argument
(
'--test'
,
action
=
'store_true'
,
help
=
'Run only on first 1000 entries.'
)
def
set_logger_level
(
self
,
verbosity
):
if
verbosity
>
2
:
logger
.
setLevel
(
logging
.
DEBUG
)
elif
verbosity
>
1
:
logger
.
setLevel
(
logging
.
INFO
)
def
handle
(
self
,
*
args
,
**
options
):
self
.
set_logger_level
(
int
(
options
[
'verbosity'
]))
import_eggnog
=
ImportEggNog
(
options
[
'annotation'
])
import_eggnog
.
load_all
(
test
=
options
[
'test'
])
backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py
0 → 100644
View file @
09458ad3
# Generated by Django 3.0 on 2019-12-09 17:02
from
django.db
import
migrations
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0013_plural_eggnog'
),
]
operations
=
[
migrations
.
RemoveField
(
model_name
=
'eggnog'
,
name
=
'long_name'
,
),
]
backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py
0 → 100644
View file @
09458ad3
# Generated by Django 3.0 on 2019-12-09 17:06
from
django.db
import
migrations
,
models
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0014_remove_eggnog_long_name'
),
]
operations
=
[
migrations
.
AlterField
(
model_name
=
'function'
,
name
=
'name'
,
field
=
models
.
CharField
(
max_length
=
200
),
),
]
backend/metagenedb/apps/catalog/models/function.py
View file @
09458ad3
...
...
@@ -12,7 +12,7 @@ class Function(models.Model):
]
function_id
=
models
.
CharField
(
max_length
=
100
,
db_index
=
True
,
unique
=
True
)
name
=
models
.
CharField
(
max_length
=
1
00
)
name
=
models
.
CharField
(
max_length
=
2
00
)
source
=
models
.
CharField
(
max_length
=
10
,
choices
=
SOURCE_CHOICES
,
default
=
UNDEFINED
)
def
__str__
(
self
):
...
...
@@ -38,7 +38,6 @@ class KeggOrthology(Function):
class
EggNog
(
Function
):
SOURCE
=
'eggnog'
long_name
=
models
.
CharField
(
max_length
=
500
)
functional_category
=
models
.
ForeignKey
(
'EggNogFunctionalCategory'
,
related_name
=
'eggnogs'
,
on_delete
=
models
.
SET_NULL
,
...
...
@@ -64,5 +63,8 @@ class EggNogFunctionalCategory(models.Model):
name
=
models
.
CharField
(
max_length
=
100
)
group
=
models
.
CharField
(
max_length
=
100
,
choices
=
GROUP_CHOICES
)
def
__str__
(
self
):
return
f
"
{
self
.
category_id
}
(
{
self
.
name
}
)"
class
Meta
:
verbose_name_plural
=
"EggNog Functional categories"
backend/metagenedb/common/utils/chunks.py
View file @
09458ad3
...
...
@@ -2,3 +2,10 @@ def generate_chunks(full_list, chunk_size):
"""Yield successive n-sized chunks from full_list."""
for
i
in
range
(
0
,
len
(
full_list
),
chunk_size
):
yield
full_list
[
i
:
i
+
chunk_size
]
def
file_len
(
file_path
):
with
open
(
file_path
)
as
f
:
for
i
,
l
in
enumerate
(
f
):
pass
return
i
+
1
backend/metagenedb/common/utils/parsers/eggnog.py
View file @
09458ad3
...
...
@@ -6,7 +6,7 @@ _LOGGER = logging.getLogger(__name__)
class
EggNogAnnotationLineParser
(
object
):
@
staticmethod
def
ko_lis
t
(
line
):
def
get_dic
t
(
line
):
"""
Parse line from Eggnog annotations.tsv file to return organized dict
"""
...
...
backend/metagenedb/common/utils/parsers/test_eggnog.py
View file @
09458ad3
...
...
@@ -12,10 +12,10 @@ class TestEggNogAnnotationLineParser(TestCase):
'name'
:
"translational termination"
,
'functional_category'
:
"K"
}
test_dict
=
EggNogAnnotationLineParser
.
ko_lis
t
(
ko_line
)
test_dict
=
EggNogAnnotationLineParser
.
get_dic
t
(
ko_line
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
def
test_ko_list_wrong_format
(
self
):
ko_line
=
"This is a wrong line format, with; information and tab"
with
self
.
assertRaises
(
Exception
)
as
context
:
# noqa
EggNogAnnotationLineParser
.
ko_lis
t
(
ko_line
)
EggNogAnnotationLineParser
.
get_dic
t
(
ko_line
)
backend/metagenedb/common/utils/test_chunks.py
View file @
09458ad3
from
unittest
import
TestCase
from
metagenedb.common.utils.chunks
import
generate_chunks
from
metagenedb.common.utils.chunks
import
generate_chunks
,
file_len
class
TestChunks
(
TestCase
):
...
...
@@ -24,3 +24,10 @@ class TestChunks(TestCase):
chunks
=
list
(
generate_chunks
(
self
.
full_list
,
chunk_size
))
self
.
assertEqual
(
len
(
chunks
),
1
)
self
.
assertEqual
(
len
(
chunks
[
-
1
]),
10
)
class
TestFileLength
(
TestCase
):
def
test_file_length
(
self
):
file_path
=
"./dev_data/IGC_sample.annotation_OF.summary"
self
.
assertEqual
(
file_len
(
file_path
),
1002
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment