Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
9ff69a05
Commit
9ff69a05
authored
Aug 27, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
update import scripts
parent
05fddb8e
Changes
2
Hide whitespace changes
Inline
Side-by-side
backend/scripts/populate_db/import_igc_data.py
View file @
9ff69a05
...
...
@@ -27,6 +27,8 @@ class ImportIGCGenes(object):
self
.
url
=
url
self
.
metagenedb_gene_api
=
self
.
METAGENEDB_GENE_API
(
base_url
=
self
.
url
)
self
.
metagenedb_taxonomy_api
=
self
.
METAGENEDB_TAXONOMY_API
(
base_url
=
self
.
url
)
self
.
processed_genes
=
0
self
.
skipped_genes
=
0
# Skip some insertion if specified in script options
self
.
skip_tax
=
skip_tax
self
.
skip_functions
=
skip_functions
...
...
@@ -88,19 +90,20 @@ class ImportIGCGenes(object):
try
:
self
.
_upsert_gene
(
gene_dict_with_taxo
)
except
HTTPError
as
e
:
self
.
skipped_genes
+=
1
_LOGGER
.
warning
(
f
"
{
e
.
response
.
json
()
}
for gene_id:
{
gene_dict
.
get
(
'gene_id'
)
}
. Insertion skipped."
)
def
load_annotation_file_to_db_in_chunks
(
self
,
chunk_size
=
100000
):
processed_genes
=
0
with
open
(
self
.
annotation_file
,
'r'
)
as
file
:
while
True
:
chunk_genes
=
list
(
islice
(
file
,
chunk_size
))
if
not
chunk_genes
:
break
processed_genes
+=
len
(
chunk_genes
)
self
.
processed_genes
+=
len
(
chunk_genes
)
self
.
_insert_gene_list
(
chunk_genes
)
_LOGGER
.
info
(
f
"
{
processed_genes
}
genes processed so far..."
)
_LOGGER
.
info
(
f
"[DONE]
{
processed_genes
}
genes processed."
)
_LOGGER
.
info
(
f
"
{
self
.
processed_genes
}
genes inserted/updated so far..."
)
_LOGGER
.
info
(
f
"[DONE]
{
self
.
processed_genes
}
genes inserted/updated."
)
_LOGGER
.
info
(
f
"[DONE]
{
self
.
skipped_genes
}
genes skipped."
)
def
parse_arguments
():
...
...
backend/scripts/populate_db/load_kegg_ko.py
View file @
9ff69a05
...
...
@@ -4,10 +4,12 @@ import logging
import
os
import
requests
import
sys
from
requests.exceptions
import
HTTPError
import
django
from
django.core.exceptions
import
ValidationError
from
metagenedb.common.utils.api
import
MetageneDBCatalogFunctionAPI
from
metagenedb.common.utils.parsers
import
KEGGLineParser
# Before model import, we need to called django.setup() to Load apps
...
...
@@ -27,44 +29,51 @@ def parse_arguments():
Defines parser.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
f
'Populate KEGG KO database from
{
KEGG_KO_LIST_API
}
.'
)
parser
.
add_argument
(
'--url'
,
help
=
'base URL of the instance.'
,
default
=
'http://localhost/'
)
try
:
return
parser
.
parse_args
()
except
SystemExit
:
sys
.
exit
(
1
)
def
create_kegg_ko
(
kegg_ko
):
try
:
obj_kegg
=
KeggOrthology
.
objects
.
get
(
function_id
=
kegg_ko
.
get
(
'function_id'
))
for
key
,
value
in
kegg_ko
.
items
():
setattr
(
obj_kegg
,
key
,
value
)
except
KeggOrthology
.
DoesNotExist
:
obj_kegg
=
KeggOrthology
(
**
kegg_ko
)
obj_kegg
.
full_clean
()
obj_kegg
.
save
()
class
ImportKEGGKO
(
object
):
METAGENEDB_FUNCTION_API
=
MetageneDBCatalogFunctionAPI
def
__init__
(
self
,
url
,
kegg_ko_list_api
=
KEGG_KO_LIST_API
):
self
.
kegg_ko_list_api
=
kegg_ko_list_api
self
.
metagenedb_function_api
=
self
.
METAGENEDB_FUNCTION_API
(
base_url
=
url
)
self
.
inserted_kegg
=
0
self
.
skipped_kegg
=
0
def
run
():
args
=
parse_arguments
()
# noqa
all_ko
=
requests
.
get
(
"http://rest.kegg.jp/list/ko"
)
all_ko
.
raise_for_status
()
inserted_kegg
=
0
skipped_kegg
=
0
total_kegg
=
len
(
all_ko
.
text
.
splitlines
())
for
line
in
all_ko
.
text
.
splitlines
():
kegg_ko
=
KEGGLineParser
.
ko_list
(
line
)
def
_upsert_kegg_ko
(
self
,
kegg_ko
):
try
:
create_kegg_ko
(
kegg_ko
)
inserted_kegg
+=
1
except
ValidationError
as
e
:
skipped_kegg
+=
1
_LOGGER
.
warning
(
f
"
{
e
.
__dict__
}
for function_id:
{
kegg_ko
.
get
(
'function_id'
)
}
. Insertion skipped."
)
if
inserted_kegg
>
0
and
inserted_kegg
%
100
==
0
:
_LOGGER
.
info
(
f
"
{
inserted_kegg
}
/
{
total_kegg
}
KEGG KO inserted so far..."
)
_LOGGER
.
info
(
f
"[DONE]
{
inserted_kegg
}
/
{
total_kegg
}
KEGG KO inserted."
)
_LOGGER
.
info
(
f
"[DONE]
{
skipped_kegg
}
/
{
total_kegg
}
KEGG KO skipped."
)
# Create unknown entry
self
.
metagenedb_function_api
.
get
(
kegg_ko
.
get
(
'function_id'
))
# Try to get obj to check if it exists
self
.
metagenedb_function_api
.
put
(
kegg_ko
.
get
(
'function_id'
),
kegg_ko
)
except
HTTPError
:
self
.
metagenedb_function_api
.
post
(
kegg_ko
)
def
load_all_kegg_ko
(
self
):
all_ko
=
requests
.
get
(
self
.
kegg_ko_list_api
)
all_ko
.
raise_for_status
()
self
.
total_kegg_nb
=
len
(
all_ko
.
text
.
splitlines
())
for
line
in
all_ko
.
text
.
splitlines
():
kegg_ko
=
KEGGLineParser
.
ko_list
(
line
)
try
:
self
.
_upsert_kegg_ko
(
kegg_ko
)
self
.
inserted_kegg
+=
1
except
ValidationError
as
e
:
self
.
skipped_kegg
+=
1
_LOGGER
.
warning
(
f
"
{
e
.
__dict__
}
for function_id:
{
kegg_ko
.
get
(
'function_id'
)
}
. Insertion skipped."
)
if
self
.
inserted_kegg
>
0
and
self
.
inserted_kegg
%
100
==
0
:
_LOGGER
.
info
(
f
"
{
self
.
inserted_kegg
}
/
{
self
.
total_kegg_nb
}
KEGG KO inserted so far..."
)
_LOGGER
.
info
(
f
"[DONE]
{
self
.
inserted_kegg
}
/
{
self
.
total_kegg_nb
}
KEGG KO inserted."
)
_LOGGER
.
info
(
f
"[DONE]
{
self
.
skipped_kegg
}
/
{
self
.
total_kegg_nb
}
KEGG KO skipped."
)
def
run
():
args
=
parse_arguments
()
import_kegg_ko
=
ImportKEGGKO
(
args
.
url
)
import_kegg_ko
.
load_all_kegg_ko
()
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment