Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
77ae83bb
Commit
77ae83bb
authored
Jan 03, 2020
by
Kenzo-Hugo Hillion
♻
Browse files
update backend with new hierarchy model
parent
112da486
Pipeline
#21282
passed with stages
in 2 minutes and 45 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/api/catalog/views/taxonomy.py
View file @
77ae83bb
from
marshmallow.exceptions
import
ValidationError
from
rest_framework.response
import
Response
from
rest_framework.status
import
HTTP_422_UNPROCESSABLE_ENTITY
from
metagenedb.api.catalog.filters
import
TaxonomyFilter
from
metagenedb.api.catalog.qparams_validators.taxonomy
import
TaxonomyQueryParams
from
metagenedb.apps.catalog.models
import
Taxonomy
...
...
@@ -12,18 +8,8 @@ from .base import BulkViewSet
class
TaxonomyViewSet
(
BulkViewSet
):
queryset
=
Taxonomy
.
objects
.
select_related
(
"parent"
,
"superkingdom"
,
"kingdom"
,
"phylum"
,
"class_rank"
,
"order"
,
"family"
,
"genus"
,
"species"
).
all
()
"parent"
).
all
()
serializer_class
=
TaxonomySerializer
lookup_field
=
'tax_id'
filterset_class
=
TaxonomyFilter
query_params_parser
=
TaxonomyQueryParams
def
retrieve
(
self
,
request
,
*
args
,
**
kwargs
):
try
:
query_params
=
self
.
_get_qparams
(
request
.
query_params
)
# noqa
except
ValidationError
as
validation_error
:
return
Response
(
validation_error
.
normalized_messages
(),
status
=
HTTP_422_UNPROCESSABLE_ENTITY
)
instance
=
self
.
get_object
()
hierarchy
=
instance
.
parental_hierarchy
# noqa
serializer
=
self
.
get_serializer
(
instance
)
return
Response
(
serializer
.
data
)
backend/metagenedb/apps/catalog/admin/taxonomy.py
View file @
77ae83bb
...
...
@@ -3,13 +3,60 @@ from django_admin_listfilter_dropdown.filters import DropdownFilter
from
metagenedb.apps.catalog.models
import
Taxonomy
RANK_DISPLAY
=
[
f
"get_
{
i
}
"
for
i
in
[
'superkingdom'
,
'kingdom'
,
'phylum'
,
'class'
,
'order'
,
'family'
,
'genus'
,
'species'
]
]
@
admin
.
register
(
Taxonomy
)
class
TaxonomyAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'tax_id'
,
'name'
,
'rank'
,
'superkingdom'
,
'kingdom'
,
'phylum'
,
'class_rank'
,
'order'
,
'family'
,
'genus'
,
'species'
,
)
'tax_id'
,
'name'
,
'rank'
,
'get_parent'
,
)
+
tuple
(
RANK_DISPLAY
)
list_filter
=
((
'rank'
,
DropdownFilter
),)
search_fields
=
(
'tax_id'
,
'name'
)
def
get_parent
(
self
,
obj
):
if
obj
.
parent
:
return
f
"
{
obj
.
parent
.
name
}
(
{
obj
.
parent
.
rank
}
)"
return
'-'
get_parent
.
short_description
=
'Parent'
def
_get_taxonomy
(
self
,
obj
,
rank
):
if
obj
.
hierarchy
:
if
obj
.
hierarchy
.
get
(
rank
)
is
not
None
:
return
"{}"
.
format
(
obj
.
hierarchy
.
get
(
rank
).
get
(
'name'
,
'-'
))
return
'-'
def
get_superkingdom
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'superkingdom'
)
get_superkingdom
.
short_description
=
'Superkingdom'
def
get_kingdom
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'kingdom'
)
get_kingdom
.
short_description
=
'Kingdom'
def
get_phylum
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'phylum'
)
get_phylum
.
short_description
=
'Phylum'
def
get_class
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'class'
)
get_class
.
short_description
=
'Class'
def
get_order
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'order'
)
get_order
.
short_description
=
'Order'
def
get_family
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'family'
)
get_family
.
short_description
=
'Family'
def
get_genus
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'genus'
)
get_genus
.
short_description
=
'Genus'
def
get_species
(
self
,
obj
):
return
self
.
_get_taxonomy
(
obj
,
'species'
)
get_species
.
short_description
=
'Species'
backend/metagenedb/apps/catalog/management/commands/build_hierarchy.py
View file @
77ae83bb
...
...
@@ -4,6 +4,8 @@ from django.core.management.base import BaseCommand
from
metagenedb.apps.catalog.models
import
Taxonomy
from
metagenedb.common.utils.profiling
import
profile
logging
.
basicConfig
(
format
=
'[%(asctime)s] %(levelname)s:%(name)s:%(message)s'
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -19,11 +21,12 @@ class HierarchyBuilder:
self
.
hierarchy_built
=
0
self
.
hierarchy_failed
=
0
@
profile
(
'/Users/khillion/Sandbox/tax_only_many_parents.prof'
)
def
build_all
(
self
,
chunk_size
=
8000
,
test
=
False
):
logger
.
info
(
"Building all hierarchy for all %s taxonomy items..."
,
self
.
total_tax
)
for
taxonomy
in
self
.
queryset
.
iterator
(
chunk_size
=
chunk_size
):
try
:
hierarchy
=
taxonomy
.
parental
_hierarchy
# noqa
hierarchy
=
taxonomy
.
build
_hierarchy
()
# noqa
self
.
hierarchy_built
+=
1
except
Exception
:
self
.
hierarchy_failed
+=
1
...
...
@@ -49,10 +52,7 @@ class Command(BaseCommand):
logger
.
setLevel
(
logging
.
INFO
)
def
get_queryset
(
self
):
return
Taxonomy
.
objects
.
select_related
(
SELECT_RELATED_PARENT
,
"superkingdom"
,
"kingdom"
,
"phylum"
,
"class_rank"
,
"order"
,
"family"
,
"genus"
,
"species"
).
all
()
return
Taxonomy
.
objects
.
select_related
(
SELECT_RELATED_PARENT
).
all
()
def
handle
(
self
,
*
args
,
**
options
):
self
.
set_logger_level
(
int
(
options
[
'verbosity'
]))
...
...
backend/metagenedb/apps/catalog/migrations/0022_group_hierarchy.py
0 → 100644
View file @
77ae83bb
# Generated by Django 3.0.1 on 2020-01-02 16:42
import
django.contrib.postgres.fields.jsonb
from
django.db
import
migrations
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0021_index_gene_length'
),
]
operations
=
[
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'class_rank'
,
),
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'family'
,
),
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'genus'
,
),
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'kingdom'
,
),
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'order'
,
),
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'phylum'
,
),
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'species'
,
),
migrations
.
RemoveField
(
model_name
=
'taxonomy'
,
name
=
'superkingdom'
,
),
migrations
.
AddField
(
model_name
=
'taxonomy'
,
name
=
'hierarchy'
,
field
=
django
.
contrib
.
postgres
.
fields
.
jsonb
.
JSONField
(
null
=
True
),
),
]
backend/metagenedb/apps/catalog/models/taxonomy.py
View file @
77ae83bb
from
django.db
import
models
from
django.contrib.postgres.fields
import
JSONField
class
Taxonomy
(
models
.
Model
):
...
...
@@ -51,81 +52,23 @@ class Taxonomy(models.Model):
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
superkingdom
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'superkingdom_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
kingdom
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'kingdom_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
phylum
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'phylum_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
class_rank
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'class_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
verbose_name
=
"class"
)
order
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'order_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
family
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'family_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
genus
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'genus_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
species
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'species_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
hierarchy
=
JSONField
(
null
=
True
)
def
__str__
(
self
):
return
f
"
{
self
.
name
}
"
@
property
def
parental_hierarchy
(
self
):
if
self
.
kingdom
is
None
and
self
.
superkingdom
is
None
:
return
self
.
_build_parental_hierarchy
()
return
self
.
_dict_parental_hierarchy
()
def
_dict_parental_hierarchy
(
self
):
"""
Return parental hierarchy from
"""
ranks
=
[
"superkingdom"
,
"kingdom"
,
"phylum"
,
"class_rank"
,
"order"
,
"family"
,
"genus"
,
"species"
]
hierarchy
=
{}
for
rank
in
ranks
:
if
getattr
(
self
,
rank
,
None
)
is
not
None
:
hierarchy
[
rank
]
=
getattr
(
self
,
rank
)
return
hierarchy
def
_build_parental_hierarchy
(
self
):
def
build_hierarchy
(
self
):
"""
Build and save parental hierarchy for an entry
"""
hierarchy
=
{}
if
self
.
name
!=
'root'
and
self
.
parent
is
not
None
:
hierarchy
[
self
.
rank
]
=
self
hierarchy
=
{
**
hierarchy
,
**
self
.
parent
.
parental_hierarchy
}
for
level
,
value
in
hierarchy
.
items
():
setattr
(
self
,
level
,
value
)
hierarchy
[
self
.
rank
]
=
{
'tax_id'
:
self
.
tax_id
,
'name'
:
self
.
name
}
hierarchy
=
{
**
hierarchy
,
**
getattr
(
self
.
parent
,
'hierarchy'
,
self
.
parent
.
build_hierarchy
())}
self
.
hierarchy
=
hierarchy
self
.
save
()
return
hierarchy
...
...
backend/metagenedb/apps/catalog/models/test_taxonomy.py
View file @
77ae83bb
...
...
@@ -5,37 +5,42 @@ from metagenedb.apps.catalog.factory import TaxonomyFactory
class
TestBuildHierarchy
(
APITestCase
):
def
setUp
(
self
):
@
classmethod
def
setUpTestData
(
cls
):
"""
Build some test data for different tests
"""
self
.
root
=
TaxonomyFactory
.
create
(
cls
.
root
=
TaxonomyFactory
.
create
(
tax_id
=
"1"
,
name
=
"root"
,
rank
=
"no_rank"
,
)
self
.
kingdom
=
TaxonomyFactory
(
cls
.
kingdom
=
TaxonomyFactory
(
tax_id
=
"2"
,
name
=
"KINGDOM"
,
rank
=
"kingdom"
,
parent
=
self
.
root
parent
=
cls
.
root
)
self
.
phylum
=
TaxonomyFactory
(
cls
.
phylum
=
TaxonomyFactory
(
tax_id
=
"3"
,
name
=
"PHYLUM"
,
rank
=
"phylum"
,
parent
=
self
.
kingdom
parent
=
cls
.
kingdom
)
def
test_build_hierarchy
(
self
):
expected_dict
=
{
'phylum'
:
self
.
phylum
,
'kingdom'
:
self
.
kingdom
'phylum'
:
{
'tax_id'
:
self
.
phylum
.
tax_id
,
'name'
:
self
.
phylum
.
name
},
'kingdom'
:
{
'tax_id'
:
self
.
kingdom
.
tax_id
,
'name'
:
self
.
kingdom
.
name
}
}
self
.
assertNotEqual
(
getattr
(
self
.
phylum
,
'kingdom'
,
None
),
self
.
kingdom
)
test_dict
=
self
.
phylum
.
parental_hierarchy
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
self
.
assertEqual
(
getattr
(
self
.
phylum
,
'kingdom'
,
None
),
self
.
kingdom
)
# Now try a second time from saved information
test_dict
=
self
.
phylum
.
parental_hierarchy
self
.
assertIsNone
(
getattr
(
self
.
phylum
,
'hierarchy'
))
test_dict
=
self
.
phylum
.
build_hierarchy
()
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
self
.
assertIsNotNone
(
getattr
(
self
.
phylum
,
'hierarchy'
))
self
.
assertDictEqual
(
getattr
(
self
.
phylum
,
'hierarchy'
),
expected_dict
)
backend/metagenedb/apps/catalog/operations/statistics.py
View file @
77ae83bb
...
...
@@ -41,42 +41,12 @@ class GeneStatistics(Statistics):
}
return
self
.
get_queryset
(
filters
=
filters
).
distinct
().
count
()
def
gene_length
(
self
,
window_size
=
1000
,
stop_at
=
10000
,
filters
=
None
):
"""
Count how many gene by window of gene length.
"""
if
not
self
.
get_queryset
().
exists
():
return
{
'counts'
:
[],
'labels'
:
[]
}
if
filters
is
None
:
queryset
=
self
.
get_queryset
().
only
(
'length'
)
else
:
queryset
=
self
.
get_queryset
(
filters
=
filters
).
distinct
().
only
(
'length'
)
length_max
=
queryset
.
aggregate
(
Max
(
'length'
)).
get
(
'length__max'
,
0
)
stop_at
=
length_max
if
length_max
<
stop_at
else
stop_at
all_ranges
=
[[
i
,
i
+
window_size
]
for
i
in
range
(
0
,
stop_at
+
1
,
window_size
)]
all_ranges
[
-
1
][
1
]
=
length_max
+
1
# last should contain all above the stop_at
data
=
[]
labels
=
[]
for
rg
in
all_ranges
:
labels
.
append
(
f
"
{
rg
[
0
]
/
1000
}
k-
{
rg
[
1
]
/
1000
}
k"
)
data
.
append
(
queryset
.
filter
(
length__gte
=
rg
[
0
],
length__lt
=
rg
[
1
]).
count
())
# Change labels
labels
[
0
]
=
f
"<
{
labels
[
0
].
split
(
'-'
)[
1
]
}
"
labels
[
-
1
]
=
f
">
{
labels
[
-
1
].
split
(
'-'
)[
0
]
}
"
return
{
'counts'
:
data
,
'labels'
:
labels
}
def
taxonomy_repartition
(
self
,
level
=
"phylum"
):
level
=
"class_rank"
if
level
==
"class"
else
level
queryset
=
self
.
get_queryset
().
select_related
(
f
'taxonomy__
{
level
}
'
)
filter_no_annotation
=
{
f
"taxonomy__
{
level
}
__isnull"
:
True
}
filter_annotation
=
{
f
"taxonomy__
{
level
}
__isnull"
:
False
}
value_to_retrieve
=
f
'taxonomy__
{
level
}
__name'
filter_no_annotation
=
{
f
"taxonomy__
hierarchy__
{
level
}
__isnull"
:
True
}
filter_annotation
=
{
f
"taxonomy__
hierarchy__
{
level
}
__isnull"
:
False
}
value_to_retrieve
=
f
'taxonomy__
hierarchy__
{
level
}
__name'
taxonomy_counts
=
defaultdict
(
lambda
:
0
)
taxonomy_counts
[
'No annotation'
]
=
queryset
.
filter
(
**
filter_no_annotation
).
values
().
count
()
if
taxonomy_counts
[
'No annotation'
]
==
0
:
...
...
backend/metagenedb/apps/catalog/operations/test_statistics.py
View file @
77ae83bb
...
...
@@ -15,6 +15,10 @@ class BaseTestGeneStatistics(APITestCase):
class
TestTaxonomyRepartition
(
BaseTestGeneStatistics
):
@
classmethod
def
setUpTestData
(
cls
):
cls
.
parent_root
=
TaxonomyFactory
(
rank
=
"root"
)
def
test_taxonomy_counts_no_content
(
self
):
expected_dict
=
{
'labels'
:
[],
...
...
@@ -26,8 +30,9 @@ class TestTaxonomyRepartition(BaseTestGeneStatistics):
def
test_taxonomy_repartition
(
self
):
tax_name
=
"TaxTest"
taxonomy
=
TaxonomyFactory
(
rank
=
'phylum'
,
name
=
tax_name
)
taxonomy
.
p
hylum
=
taxonomy
# link taxonomy to itself as phylum
taxonomy
.
p
arent
=
self
.
parent_root
taxonomy
.
save
()
taxonomy
.
build_hierarchy
()
gene
=
GeneFactory
.
create
(
taxonomy
=
taxonomy
)
# noqa
expected_dict
=
{
'labels'
:
[
tax_name
],
...
...
@@ -39,8 +44,9 @@ class TestTaxonomyRepartition(BaseTestGeneStatistics):
def
test_taxonomy_counts_class_level
(
self
):
tax_name
=
"TaxTest"
taxonomy
=
TaxonomyFactory
(
rank
=
'class_rank'
,
name
=
tax_name
)
taxonomy
.
class_rank
=
taxonomy
# link taxonomy to itself as phylum
taxonomy
.
parent
=
self
.
parent_root
taxonomy
.
save
()
taxonomy
.
build_hierarchy
()
gene
=
GeneFactory
.
create
(
taxonomy
=
taxonomy
)
# noqa
expected_dict
=
{
'labels'
:
[
tax_name
],
...
...
backend/metagenedb/apps/catalog/serializers/taxonomy.py
View file @
77ae83bb
...
...
@@ -27,59 +27,10 @@ class TaxonomySerializer(serializers.ModelSerializer):
source
=
'parent'
,
required
=
False
,
)
superkingdom
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
kingdom
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
phylum
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
class_rank
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
order
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
family
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
genus
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
species
=
AsymetricSlugRelatedField
.
from_serializer
(
SimpleTaxonomySerializer
,
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
class
Meta
:
model
=
Taxonomy
list_serializer_class
=
TaxonomyListSerializer
fields
=
(
'tax_id'
,
'name'
,
'rank'
,
'parent_tax_id'
,
'superkingdom'
,
'kingdom'
,
'phylum'
,
'class_rank'
,
'order'
,
'family'
,
'genus'
,
'species'
,
'tax_id'
,
'name'
,
'rank'
,
'parent_tax_id'
,
'hierarchy'
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment