Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
0f854f02
Commit
0f854f02
authored
Aug 01, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Replace custom insertion_model by serializers
parent
a569de25
Changes
9
Hide whitespace changes
Inline
Side-by-side
backend/dev_data/IGC_sample.annotation_OF.summary
View file @
0f854f02
...
...
@@ -999,3 +999,4 @@
999 158499257-stool1_revised_C1458534_1_gene127873 11955 Complete USA unknown unknown unknown NOG295308 0.00315706393054459 0.00280373831775701 unknown unknown USA
1000 MH0385_GL0059251 11946 Lack both ends EUR unknown unknown unknown unknown 0.000789265982636148 0.000934579439252336 unknown unknown EUR
1000 MH0385_GL0059251 11946 Lack both ends EUR unknown unknown unknown unknown 0.000789265982636148 0.000934579439252336 unknown unknown EUR
353535 wrong_length the_length Info EUR unknown unknown unknown unknown 0.0000001 0.00000001 0.0000001 unknown unknown EUR
backend/metagenedb/apps/catalog/migrations/0003_
auto_20190717_1551
.py
→
backend/metagenedb/apps/catalog/migrations/0003_
complete_taxonomy
.py
View file @
0f854f02
File moved
backend/metagenedb/apps/catalog/migrations/0004_taxonomy_superkingdom.py
View file @
0f854f02
...
...
@@ -7,7 +7,7 @@ import django.db.models.deletion
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0003_
auto_20190717_1551
'
),
(
'catalog'
,
'0003_
complete_taxonomy
'
),
]
operations
=
[
...
...
backend/metagenedb/apps/catalog/migrations/0005_gene_ordering.py
0 → 100644
View file @
0f854f02
# Generated by Django 2.2.1 on 2019-08-01 14:16
from
django.db
import
migrations
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0004_taxonomy_superkingdom'
),
]
operations
=
[
migrations
.
AlterModelOptions
(
name
=
'gene'
,
options
=
{
'ordering'
:
[
'-gene_id'
]},
),
]
backend/metagenedb/apps/catalog/models/gene.py
View file @
0f854f02
...
...
@@ -10,3 +10,6 @@ class Gene(models.Model):
def
__str__
(
self
):
return
self
.
gene_id
class
Meta
:
ordering
=
[
'-gene_id'
]
backend/metagenedb/apps/catalog/views/gene.py
View file @
0f854f02
...
...
@@ -9,30 +9,13 @@ from rest_framework.decorators import (
from
rest_framework.response
import
Response
from
django.core.paginator
import
Paginator
,
EmptyPage
,
PageNotAnInteger
from
metagenedb.apps.catalog.models
import
Function
,
Gene
from
metagenedb.apps.catalog.models
import
Gene
from
metagenedb.apps.catalog.serializers
import
GeneSerializer
from
metagenedb.apps.catalog.views.insertion_model
import
InsertionBase
logging
.
basicConfig
(
level
=
logging
.
INFO
)
_LOGGER
=
logging
.
getLogger
(
__name__
)
class
GeneInsertion
(
InsertionBase
):
MANY_TO_MANY_FIELDS
=
[
'kegg_ko'
]
model
=
Gene
obj_id
=
"gene_id"
def
_link_kegg_ko
(
self
,
function_id
):
VALUE_TO_SKIP
=
[
'unknown'
]
if
function_id
not
in
VALUE_TO_SKIP
:
try
:
function
=
Function
.
objects
.
get
(
function_id
=
function_id
)
self
.
obj
.
functions
.
add
(
function
)
self
.
full_clean_and_save
()
except
Function
.
DoesNotExist
:
_LOGGER
.
warning
(
f
"
{
function_id
}
not found in the database. Full dict:
{
self
.
full_dict
}
."
)
@
api_view
([
'GET'
])
@
authentication_classes
(())
@
permission_classes
(())
...
...
backend/metagenedb/apps/catalog/views/insertion_model.py
deleted
100644 → 0
View file @
a569de25
from
abc
import
ABC
from
metagenedb.utils.dict_operations
import
extract_dict
class
InsertionBase
(
ABC
):
"""
Base for insertion in DB for different models.
This base will be used for POST methods but also direct insertion to DB from scripts.
"""
MANY_TO_MANY_FIELDS
=
[]
FOREIGN_KEY_FIELDS
=
[]
SIMPLE_FIELDS
=
[]
# Fields you want to be able to create with the class
@
property
def
model
(
self
):
raise
NotImplementedError
@
property
def
obj_id
(
self
):
raise
NotImplementedError
def
__init__
(
self
,
model_dict
):
self
.
full_dict
=
model_dict
.
copy
()
self
.
foreign_key_dict
=
extract_dict
(
model_dict
,
self
.
FOREIGN_KEY_FIELDS
)
self
.
many_to_many_dict
=
extract_dict
(
model_dict
,
self
.
MANY_TO_MANY_FIELDS
)
if
self
.
SIMPLE_FIELDS
:
self
.
simple_dict
=
extract_dict
(
model_dict
,
self
.
SIMPLE_FIELDS
)
else
:
self
.
simple_dict
=
model_dict
.
copy
()
self
.
obj
=
None
def
upsert_to_db
(
self
):
try
:
self
.
obj
=
self
.
model
.
objects
.
get
(
**
{
self
.
obj_id
:
self
.
full_dict
.
get
(
self
.
obj_id
)})
for
key
,
value
in
self
.
simple_dict
.
items
():
setattr
(
self
.
obj
,
key
,
value
)
except
self
.
model
.
DoesNotExist
:
self
.
create_obj
()
self
.
full_clean_and_save
()
self
.
handle_foreign_fields
()
self
.
handle_many_to_many_fields
()
def
create_obj
(
self
):
self
.
obj
=
self
.
model
(
**
self
.
simple_dict
)
def
full_clean_and_save
(
self
):
self
.
obj
.
full_clean
()
self
.
obj
.
save
()
def
handle_foreign_fields
(
self
):
for
key
,
value
in
self
.
foreign_key_dict
.
items
():
getattr
(
self
,
f
"_link_
{
key
}
"
)(
value
)
def
handle_many_to_many_fields
(
self
):
for
key
,
value
in
self
.
many_to_many_dict
.
items
():
getattr
(
self
,
f
"_link_
{
key
}
"
)(
value
)
backend/scripts/populate_db/import_igc_data.py
View file @
0f854f02
...
...
@@ -6,14 +6,14 @@ import sys
from
itertools
import
islice
import
django
from
django.c
or
e
.exceptions
import
ValidationError
from
rest_framew
or
k
.exceptions
import
ValidationError
# Before model import, we need to called django.setup() to Load apps
os
.
environ
.
setdefault
(
"DJANGO_SETTINGS_MODULE"
,
"metagenedb.settings"
)
django
.
setup
()
from
metagenedb.apps.catalog.models
import
Gene
,
Function
# noqa
from
metagenedb.apps.catalog.
views.gene
import
Gene
Insertion
# noqa
from
metagenedb.apps.catalog.
serializers
import
Gene
Serializer
# noqa
logging
.
basicConfig
(
level
=
logging
.
INFO
)
_LOGGER
=
logging
.
getLogger
(
__name__
)
...
...
@@ -46,13 +46,21 @@ def parse_gene(raw_line):
}
def
upsert_gene
(
gene_dict
):
try
:
gene_obj
=
Gene
.
objects
.
get
(
gene_id
=
gene_dict
.
get
(
'gene_id'
))
serializer
=
GeneSerializer
(
gene_obj
,
data
=
gene_dict
)
except
Gene
.
DoesNotExist
:
serializer
=
GeneSerializer
(
data
=
gene_dict
)
serializer
.
is_valid
(
raise_exception
=
True
)
serializer
.
save
()
def
insert_gene_list
(
chunk_genes
):
for
gene_line
in
chunk_genes
:
gene_dict
=
parse_gene
(
gene_line
)
try
:
gene_dict
=
parse_gene
(
gene_line
)
# insert_gene(gene_dict)
gene_insertion
=
GeneInsertion
(
gene_dict
)
gene_insertion
.
upsert_to_db
()
upsert_gene
(
gene_dict
)
except
ValidationError
as
e
:
_LOGGER
.
warning
(
f
"
{
e
.
__dict__
}
for gene_id:
{
gene_dict
.
get
(
'gene_id'
)
}
. Insertion skipped."
)
...
...
backend/scripts/populate_db/test_import_igc_data.py
0 → 100644
View file @
0f854f02
from
unittest
import
TestCase
from
rest_framework.exceptions
import
ValidationError
from
rest_framework.test
import
APITestCase
from
metagenedb.apps.catalog.models
import
Gene
from
scripts.populate_db.import_igc_data
import
parse_gene
,
upsert_gene
class
TestParseGene
(
TestCase
):
def
test_parse_gene
(
self
):
raw_data
=
[
'gene_id'
,
'gene_name'
,
'gene_length'
,
'gene_completeness_status'
,
'cohort_origin'
,
'taxo_phylum'
,
'taxo_genus'
,
'kegg'
,
'eggnog'
,
'sample_occurence_freq'
,
'ind_occurence_freq'
,
'kegg_functional_cat'
,
'eggnog_functional_cat'
,
'cohort_assembled'
]
raw_line
=
"
\t
"
.
join
(
raw_data
)
expected_dict
=
{
'gene_id'
:
'gene_name'
,
# We use the gene name for our gene ID
'gene_length'
:
'gene_length'
,
'kegg_ko'
:
'kegg'
}
tested_dict
=
parse_gene
(
raw_line
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
class
TestUpsertGene
(
APITestCase
):
def
test_insert_valid_gene_no_kegg
(
self
):
valid_gene
=
{
'gene_id'
:
'test_gene01'
,
'gene_length'
:
3556
}
upsert_gene
(
valid_gene
)
self
.
assertEqual
(
Gene
.
objects
.
all
().
count
(),
1
)
def
test_insert_invalid_gene_length
(
self
):
invalid_gene
=
{
'gene_id'
:
'test_gene01'
,
'gene_length'
:
'wrong_format'
}
with
self
.
assertRaises
(
ValidationError
)
as
context
:
# noqa
upsert_gene
(
invalid_gene
)
def
test_update_gene
(
self
):
valid_gene
=
{
'gene_id'
:
'test_gene01'
,
'gene_length'
:
3556
}
updated_gene
=
{
'gene_id'
:
'test_gene01'
,
'gene_length'
:
356
}
upsert_gene
(
valid_gene
)
self
.
assertEqual
(
Gene
.
objects
.
get
(
gene_id
=
"test_gene01"
).
gene_length
,
3556
)
upsert_gene
(
updated_gene
)
self
.
assertEqual
(
Gene
.
objects
.
get
(
gene_id
=
"test_gene01"
).
gene_length
,
356
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment