Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
5ae2909a
Commit
5ae2909a
authored
Jul 19, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Merge branch '23-taxonomy-model' into 'master'
add superkingdom and improve performances Closes #23 See merge request
!4
parents
365da9d0
bee34ed2
Pipeline
#13265
passed with stage
in 1 minute and 19 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/admin/taxonomy.py
View file @
5ae2909a
...
...
@@ -7,7 +7,7 @@ from metagenedb.apps.catalog.models import Taxonomy
class
TaxonomyAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'tax_id'
,
'name'
,
'rank'
,
'tax_id'
,
'name'
,
'rank'
,
'superkingdom'
,
'kingdom'
,
'phylum'
,
'class_rank'
,
'order'
,
'family'
,
'genus'
,
'species'
,
)
search_fields
=
(
'tax_id'
,
'name'
)
backend/metagenedb/apps/catalog/migrations/0004_taxonomy_superkingdom.py
0 → 100644
View file @
5ae2909a
# Generated by Django 2.2.1 on 2019-07-18 09:34
from
django.db
import
migrations
,
models
import
django.db.models.deletion
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0003_auto_20190717_1551'
),
]
operations
=
[
migrations
.
AddField
(
model_name
=
'taxonomy'
,
name
=
'superkingdom'
,
field
=
models
.
ForeignKey
(
blank
=
True
,
null
=
True
,
on_delete
=
django
.
db
.
models
.
deletion
.
SET_NULL
,
related_name
=
'superkingdom_children'
,
to
=
'catalog.Taxonomy'
),
),
]
backend/metagenedb/apps/catalog/models/taxonomy.py
View file @
5ae2909a
...
...
@@ -52,6 +52,11 @@ class Taxonomy(models.Model):
null
=
True
,
blank
=
True
,
)
superkingdom
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'superkingdom_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
kingdom
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'kingdom_children'
,
on_delete
=
models
.
SET_NULL
,
...
...
@@ -74,7 +79,7 @@ class Taxonomy(models.Model):
null
=
True
,
blank
=
True
,
)
family
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'family
phy
_children'
,
'Taxonomy'
,
related_name
=
'family_children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
...
...
backend/metagenedb/apps/catalog/serializers/taxonomy.py
View file @
5ae2909a
...
...
@@ -10,6 +10,11 @@ class TaxonomySerializer(serializers.ModelSerializer):
source
=
'parent'
,
required
=
False
,
)
superkingdom
=
serializers
.
SlugRelatedField
(
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
required
=
False
)
kingdom
=
serializers
.
SlugRelatedField
(
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
...
...
@@ -49,6 +54,6 @@ class TaxonomySerializer(serializers.ModelSerializer):
class
Meta
:
model
=
Taxonomy
fields
=
(
'tax_id'
,
'name'
,
'rank'
,
'parent_tax_id'
,
'tax_id'
,
'name'
,
'rank'
,
'parent_tax_id'
,
'superkingdom'
,
'kingdom'
,
'phylum'
,
'class_rank'
,
'order'
,
'family'
,
'genus'
,
'species'
,
)
backend/scripts/populate_db/import_ncbi_taxonomy.py
View file @
5ae2909a
...
...
@@ -18,6 +18,8 @@ from metagenedb.apps.catalog.serializers import TaxonomySerializer # noqa
logging
.
basicConfig
(
level
=
logging
.
INFO
)
_LOGGER
=
logging
.
getLogger
(
__name__
)
SELECT_RELATED_PARENT
=
"parent{}"
.
format
(
"__parent"
*
40
)
def
import_names
(
taxonomy_names_file
,
select_class
=
"scientific name"
):
"""
...
...
@@ -62,21 +64,30 @@ def update_taxo_nodes(taxonomy_nodes_file):
_LOGGER
.
warning
(
f
"Invalid data:
{
serializer
.
errors
}
. Link to parent skipped. Data:
{
serializer
.
data
}
"
)
def
build_hierarchy
():
def
_build_hierarchy
(
taxo
):
hierarchy
=
taxo
.
build_parental_hierarchy
()
if
'class'
in
hierarchy
.
keys
():
hierarchy
[
'class_rank'
]
=
hierarchy
.
pop
(
'class'
)
serializer
=
TaxonomySerializer
(
taxo
,
hierarchy
)
if
serializer
.
is_valid
():
serializer
.
save
()
else
:
_LOGGER
.
warning
(
f
"Invalid data:
{
serializer
.
errors
}
. Building hierarchy skipped. Data:
{
serializer
.
data
}
"
)
def
build_all_hierarchy
(
chunk_size
=
8000
):
"""
Uses class method from Taxonomy model to retrieve the parental hierarchy and
assign corresponding attribute to each entry.
"""
_LOGGER
.
info
(
f
"Linking taxonomy objects to parental nodes from direct parental nodes..."
)
for
taxo
in
Taxonomy
.
objects
.
all
():
hierarchy
=
taxo
.
build_parental_hierarchy
()
if
'class'
in
hierarchy
.
keys
():
hierarchy
[
'class_rank'
]
=
hierarchy
.
pop
(
'class'
)
serializer
=
TaxonomySerializer
(
taxo
,
hierarchy
)
if
serializer
.
is_valid
():
serializer
.
save
()
else
:
_LOGGER
.
warning
(
f
"Invalid data:
{
serializer
.
errors
}
. Building hierarchy skipped. Data:
{
serializer
.
data
}
"
)
all_taxo
=
Taxonomy
.
objects
.
select_related
(
SELECT_RELATED_PARENT
).
all
()
cpt
=
0
for
taxo
in
all_taxo
.
iterator
(
chunk_size
=
chunk_size
):
_build_hierarchy
(
taxo
)
cpt
+=
1
if
cpt
%
10000
==
0
:
_LOGGER
.
info
(
f
"
{
cpt
}
/
{
all_taxo
.
count
()
}
hierachies built..."
)
def
parse_arguments
():
...
...
@@ -99,7 +110,7 @@ def run():
taxonomy_names
=
import_names
(
args
.
names
)
create_taxo_nodes
(
args
.
nodes
,
taxonomy_names
)
update_taxo_nodes
(
args
.
nodes
)
build_hierarchy
()
build_
all_
hierarchy
()
if
__name__
==
"__main__"
:
...
...
docker-compose.yaml
View file @
5ae2909a
...
...
@@ -20,6 +20,7 @@ services:
-
main
db
:
shm_size
:
'
2gb'
container_name
:
db
image
:
postgres:latest
ports
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment