Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
f5da1cca
Commit
f5da1cca
authored
Aug 05, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Add taxonomy to Gene model
parent
25377fe4
Pipeline
#13540
passed with stages
in 2 minutes and 3 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/admin/gene.py
View file @
f5da1cca
...
...
@@ -6,9 +6,17 @@ from metagenedb.apps.catalog.models import Gene
@
admin
.
register
(
Gene
)
class
GeneAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'gene_id'
,
'gene_length'
,
'get_functions'
)
list_display
=
(
'gene_id'
,
'gene_length'
,
'get_functions'
,
'get_taxonomy'
)
search_fields
=
(
'gene_id'
,)
def
get_functions
(
self
,
obj
):
return
","
.
join
([
str
(
f
)
for
f
in
obj
.
functions
.
all
()])
if
obj
.
functions
.
all
():
return
","
.
join
([
str
(
f
)
for
f
in
obj
.
functions
.
all
()])
return
'-'
get_functions
.
short_description
=
'Functions'
def
get_taxonomy
(
self
,
obj
):
if
obj
.
taxonomy
:
return
f
"
{
obj
.
taxonomy
}
(
{
obj
.
taxonomy
.
rank
}
)"
return
'-'
get_taxonomy
.
short_description
=
'Taxonomy'
backend/metagenedb/apps/catalog/migrations/0006_gene_taxonomy.py
0 → 100644
View file @
f5da1cca
# Generated by Django 2.2.1 on 2019-08-05 13:45
from
django.db
import
migrations
,
models
import
django.db.models.deletion
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0005_gene_ordering'
),
]
operations
=
[
migrations
.
AddField
(
model_name
=
'gene'
,
name
=
'taxonomy'
,
field
=
models
.
ForeignKey
(
blank
=
True
,
null
=
True
,
on_delete
=
django
.
db
.
models
.
deletion
.
SET_NULL
,
related_name
=
'genes'
,
to
=
'catalog.Taxonomy'
),
),
]
backend/metagenedb/apps/catalog/models/gene.py
View file @
f5da1cca
...
...
@@ -7,6 +7,11 @@ class Gene(models.Model):
gene_id
=
models
.
CharField
(
max_length
=
100
,
unique
=
True
,
db_index
=
True
)
gene_length
=
models
.
IntegerField
()
functions
=
models
.
ManyToManyField
(
Function
)
taxonomy
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'genes'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
)
def
__str__
(
self
):
return
self
.
gene_id
...
...
backend/metagenedb/apps/catalog/serializers/gene.py
View file @
f5da1cca
from
rest_framework
import
serializers
from
metagenedb.apps.catalog.models
import
Gene
from
metagenedb.apps.catalog.models
import
Gene
,
Taxonomy
from
metagenedb.apps.catalog.serializers
import
FunctionSerializer
class
GeneSerializer
(
serializers
.
ModelSerializer
):
functions
=
FunctionSerializer
(
many
=
True
,
read_only
=
True
)
taxonomy
=
serializers
.
SlugRelatedField
(
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
,
)
class
Meta
:
model
=
Gene
fields
=
(
'gene_id'
,
'gene_length'
,
'functions'
)
fields
=
(
'gene_id'
,
'gene_length'
,
'functions'
,
'taxonomy'
)
backend/scripts/populate_db/import_igc_data.py
View file @
f5da1cca
...
...
@@ -14,13 +14,15 @@ from metagenedb.common.utils.parsers import IGCLineParser
os
.
environ
.
setdefault
(
"DJANGO_SETTINGS_MODULE"
,
"metagenedb.settings"
)
django
.
setup
()
from
metagenedb.apps.catalog.models
import
Gene
,
Function
# noqa
from
metagenedb.apps.catalog.models
import
Gene
,
Function
,
Taxonomy
# noqa
from
metagenedb.apps.catalog.serializers
import
GeneSerializer
# noqa
logging
.
basicConfig
(
level
=
logging
.
INFO
)
_LOGGER
=
logging
.
getLogger
(
__name__
)
SELECTED_KEYS
=
[
'gene_id'
,
'gene_length'
,
'kegg_ko'
]
PHYLUM_COL
=
'taxo_phylum'
GENUS_COL
=
'taxo_genus'
SELECTED_KEYS
=
[
'gene_id'
,
'gene_length'
,
'kegg_ko'
,
PHYLUM_COL
,
GENUS_COL
]
def
parse_gene
(
raw_line
,
selected_keys
=
SELECTED_KEYS
):
...
...
@@ -33,6 +35,30 @@ def parse_gene(raw_line, selected_keys=SELECTED_KEYS):
return
selected_dict
def
select_taxonomy
(
gene_dict
,
unknown_val
=
'unknown'
):
"""
Select the taxonomy to be assigned for the gene.
genus has priority on phylum. If both unknow, remove the taxonomy key
"""
phylum
=
gene_dict
.
pop
(
PHYLUM_COL
)
genus
=
gene_dict
.
pop
(
GENUS_COL
)
if
genus
!=
unknown_val
:
queryset
=
Taxonomy
.
objects
.
filter
(
name
=
genus
,
rank
=
"genus"
)
if
queryset
.
count
()
>
1
:
_LOGGER
.
warning
(
f
"More than 1 result found for genus
{
genus
}
. First result is kept."
)
gene_dict
.
update
(
{
'taxonomy'
:
queryset
[
0
].
tax_id
}
)
elif
phylum
!=
unknown_val
:
queryset
=
Taxonomy
.
objects
.
filter
(
name
=
phylum
,
rank
=
"phylum"
)
if
queryset
.
count
()
>
1
:
_LOGGER
.
warning
(
f
"More than 1 result found for phylum
{
phylum
}
. First result is kept."
)
gene_dict
.
update
(
{
'taxonomy'
:
queryset
[
0
].
tax_id
}
)
return
gene_dict
def
upsert_gene
(
gene_dict
):
try
:
gene_obj
=
Gene
.
objects
.
get
(
gene_id
=
gene_dict
.
get
(
'gene_id'
))
...
...
@@ -46,8 +72,9 @@ def upsert_gene(gene_dict):
def
insert_gene_list
(
chunk_genes
):
for
gene_line
in
chunk_genes
:
gene_dict
=
parse_gene
(
gene_line
)
gene_dict_with_taxo
=
select_taxonomy
(
gene_dict
)
try
:
upsert_gene
(
gene_dict
)
upsert_gene
(
gene_dict
_with_taxo
)
except
ValidationError
as
e
:
_LOGGER
.
warning
(
f
"
{
e
.
__dict__
}
for gene_id:
{
gene_dict
.
get
(
'gene_id'
)
}
. Insertion skipped."
)
...
...
backend/scripts/populate_db/test_import_igc_data.py
View file @
f5da1cca
...
...
@@ -4,7 +4,7 @@ from rest_framework.exceptions import ValidationError
from
rest_framework.test
import
APITestCase
from
metagenedb.apps.catalog.models
import
Gene
from
scripts.populate_db.import_igc_data
import
parse_gene
,
upsert_gene
from
scripts.populate_db.import_igc_data
import
parse_gene
,
upsert_gene
,
select_taxonomy
class
TestParseGene
(
TestCase
):
...
...
@@ -35,7 +35,9 @@ class TestParseGene(TestCase):
expected_dict
=
{
'gene_id'
:
'gene_name'
,
'gene_length'
:
'gene_length'
,
'kegg_ko'
:
'kegg'
'kegg_ko'
:
'kegg'
,
'taxo_phylum'
:
'taxo_phylum'
,
'taxo_genus'
:
'taxo_genus'
,
}
tested_dict
=
parse_gene
(
self
.
raw_line
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
...
...
@@ -96,3 +98,29 @@ class TestUpsertGene(APITestCase):
self
.
assertEqual
(
Gene
.
objects
.
get
(
gene_id
=
"test_gene01"
).
gene_length
,
3556
)
upsert_gene
(
updated_gene
)
self
.
assertEqual
(
Gene
.
objects
.
get
(
gene_id
=
"test_gene01"
).
gene_length
,
356
)
class
TestSelectTaxonomy
(
TestCase
):
def
test_genus_only
(
self
):
pass
# @TODO with #31
def
test_phylum_only
(
self
):
pass
# @TODO with #31
def
test_genus_phylum
(
self
):
pass
# @TODO with #31
def
test_both_unknown
(
self
):
gene_dict
=
{
'gene_id'
:
'gene'
,
'gene_length'
:
135
,
'taxo_phylum'
:
'unknown'
,
'taxo_genus'
:
'unknown'
}
expected_dict
=
{
'gene_id'
:
'gene'
,
'gene_length'
:
135
}
tested_dict
=
select_taxonomy
(
gene_dict
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment