Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
59aeb54a
Commit
59aeb54a
authored
Jun 11, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
add script to populate DB
parent
45bc993a
Changes
1
Hide whitespace changes
Inline
Side-by-side
web/scripts/import_igc_data.py
0 → 100644
View file @
59aeb54a
import
argparse
import
logging
import
os
import
sys
from
itertools
import
islice
import
django
from
django.core.exceptions
import
ValidationError
# Before model import, we need to called django.setup() to Load apps
os
.
environ
.
setdefault
(
"DJANGO_SETTINGS_MODULE"
,
"metagenedb.settings"
)
django
.
setup
()
from
metagenedb.apps.catalog.models
import
Gene
logging
.
basicConfig
(
level
=
logging
.
INFO
)
_LOGGER
=
logging
.
getLogger
(
__name__
)
def
create_gene
(
raw_line
):
gene_info
=
raw_line
.
rstrip
().
split
(
'
\t
'
)
gene
=
Gene
(
gene_id
=
gene_info
[
1
],
gene_length
=
gene_info
[
2
],
taxonomic_genus
=
gene_info
[
6
],
taxonomic_phylum
=
gene_info
[
5
])
return
gene
def
insert_gene
(
gene
):
gene
.
full_clean
()
gene
.
save
()
def
insert_gene_list
(
chunk_genes
):
for
i
in
chunk_genes
:
try
:
gene
=
create_gene
(
i
)
insert_gene
(
gene
)
except
ValidationError
as
e
:
_LOGGER
.
warning
(
f
"
{
e
.
__dict__
}
for gene_id:
{
gene
.
gene_id
}
. Insertion skipped."
)
def
load_annotation_file_to_db_in_chunks
(
annotation_file
,
chunk_size
=
100000
):
loaded_genes
=
0
with
open
(
annotation_file
,
'r'
)
as
file
:
while
True
:
chunk_genes
=
list
(
islice
(
file
,
chunk_size
))
if
not
chunk_genes
:
break
loaded_genes
+=
len
(
chunk_genes
)
insert_gene_list
(
chunk_genes
)
_LOGGER
.
info
(
f
"
{
loaded_genes
}
genes processed so far..."
)
_LOGGER
.
info
(
f
"[DONE]
{
loaded_genes
}
genes processed."
)
def
parse_arguments
():
"""
Defines parser.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
'Populate database from a given IGC annotation file.'
)
# Common arguments for analysis and annotations
parser
.
add_argument
(
'annotation'
,
help
=
'IGC annotation file'
)
parser
.
add_argument
(
'--delete_all'
,
action
=
'store_true'
,
help
=
'Empty database before insertion.'
)
try
:
return
parser
.
parse_args
()
except
SystemExit
:
sys
.
exit
(
1
)
def
run
():
args
=
parse_arguments
()
if
args
.
delete_all
:
Gene
.
objects
.
all
().
delete
()
load_annotation_file_to_db_in_chunks
(
args
.
annotation
)
if
__name__
==
"__main__"
:
run
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment