Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
f00c7a44
Commit
f00c7a44
authored
Jul 17, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Add Taxonomy model and script to import from local files
parent
08993b8c
Pipeline
#13218
failed with stage
in 1 minute and 15 seconds
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/admin/__init__.py
View file @
f00c7a44
from
.gene
import
GeneAdmin
from
.function
import
FunctionAdmin
,
KeggOrthologyAdmin
from
.taxonomy
import
TaxonomyAdmin
__all__
=
[
'GeneAdmin'
,
'FunctionAdmin'
,
'KeggOrthologyAdmin'
]
__all__
=
[
'GeneAdmin'
,
'FunctionAdmin'
,
'KeggOrthologyAdmin'
,
'TaxonomyAdmin'
]
backend/metagenedb/apps/catalog/admin/taxonomy.py
0 → 100644
View file @
f00c7a44
from
django.contrib
import
admin
from
metagenedb.apps.catalog.models
import
Taxonomy
@
admin
.
register
(
Taxonomy
)
class
TaxonomyAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'tax_id'
,
'name'
,
'rank'
,
'parent'
)
search_fields
=
(
'tax_id'
,
'name'
)
backend/metagenedb/apps/catalog/migrations/0002_taxonomy.py
0 → 100644
View file @
f00c7a44
# Generated by Django 2.2.1 on 2019-07-17 12:20
from
django.db
import
migrations
,
models
import
django.db.models.deletion
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0001_initial'
),
]
operations
=
[
migrations
.
CreateModel
(
name
=
'Taxonomy'
,
fields
=
[
(
'id'
,
models
.
AutoField
(
auto_created
=
True
,
primary_key
=
True
,
serialize
=
False
,
verbose_name
=
'ID'
)),
(
'tax_id'
,
models
.
CharField
(
db_index
=
True
,
max_length
=
20
,
unique
=
True
)),
(
'name'
,
models
.
CharField
(
default
=
'No scientific name'
,
max_length
=
200
)),
(
'rank'
,
models
.
CharField
(
choices
=
[(
'infraclass'
,
'Infraclass'
),
(
'class'
,
'Class'
),
(
'forma'
,
'Forma'
),
(
'phylum'
,
'Phylum'
),
(
'species_subgroup'
,
'Species subgroup'
),
(
'genus'
,
'Genus'
),
(
'parvorder'
,
'Parvorder'
),
(
'subcohort'
,
'Subcohort'
),
(
'subtribe'
,
'Subtribe'
),
(
'superphylum'
,
'Superphylum'
),
(
'subgenus'
,
'Subgenus'
),
(
'superorder'
,
'Superorder'
),
(
'species'
,
'Species'
),
(
'subphylum'
,
'Subphylum'
),
(
'infraorder'
,
'Infraorder'
),
(
'section'
,
'Section'
),
(
'tribe'
,
'Tribe'
),
(
'cohort'
,
'Cohort'
),
(
'subsection'
,
'Subsection'
),
(
'series'
,
'Series'
),
(
'order'
,
'Order'
),
(
'subclass'
,
'Subclass'
),
(
'superfamily'
,
'Superfamily'
),
(
'superclass'
,
'Superclass'
),
(
'superkingdom'
,
'Superkingdom'
),
(
'kingdom'
,
'Kingdom'
),
(
'family'
,
'Family'
),
(
'suborder'
,
'Suborder'
),
(
'subkingdom'
,
'Subkingdom'
),
(
'subspecies'
,
'Subspecies'
),
(
'no_rank'
,
'No rank'
),
(
'subfamily'
,
'Subfamily'
),
(
'varietas'
,
'Varietas'
),
(
'species_group'
,
'Species group'
)],
max_length
=
20
)),
(
'parent'
,
models
.
ForeignKey
(
blank
=
True
,
null
=
True
,
on_delete
=
django
.
db
.
models
.
deletion
.
SET_NULL
,
related_name
=
'children'
,
to
=
'catalog.Taxonomy'
)),
],
options
=
{
'verbose_name_plural'
:
'Taxonomy'
,
},
),
]
backend/metagenedb/apps/catalog/models/__init__.py
View file @
f00c7a44
from
.function
import
Function
,
KeggOrthology
from
.gene
import
Gene
from
.taxonomy
import
Taxonomy
__all__
=
[
'Function'
,
'KeggOrthology'
,
'Gene'
]
__all__
=
[
'Function'
,
'KeggOrthology'
,
'Gene'
,
'Taxonomy'
]
backend/metagenedb/apps/catalog/models/taxonomy.py
0 → 100644
View file @
f00c7a44
from
django.db
import
models
class
Taxonomy
(
models
.
Model
):
"""
Taxonomy is based on NCBI taxonomy: https://www.ncbi.nlm.nih.gov/taxonomy
"""
NAME_DEFAULT
=
"No scientific name"
RANK_CHOICES
=
[
(
'infraclass'
,
'Infraclass'
),
(
'class'
,
'Class'
),
(
'forma'
,
'Forma'
),
(
'phylum'
,
'Phylum'
),
(
'species_subgroup'
,
'Species subgroup'
),
(
'genus'
,
'Genus'
),
(
'parvorder'
,
'Parvorder'
),
(
'subcohort'
,
'Subcohort'
),
(
'subtribe'
,
'Subtribe'
),
(
'superphylum'
,
'Superphylum'
),
(
'subgenus'
,
'Subgenus'
),
(
'superorder'
,
'Superorder'
),
(
'species'
,
'Species'
),
(
'subphylum'
,
'Subphylum'
),
(
'infraorder'
,
'Infraorder'
),
(
'section'
,
'Section'
),
(
'tribe'
,
'Tribe'
),
(
'cohort'
,
'Cohort'
),
(
'subsection'
,
'Subsection'
),
(
'series'
,
'Series'
),
(
'order'
,
'Order'
),
(
'subclass'
,
'Subclass'
),
(
'superfamily'
,
'Superfamily'
),
(
'superclass'
,
'Superclass'
),
(
'superkingdom'
,
'Superkingdom'
),
(
'kingdom'
,
'Kingdom'
),
(
'family'
,
'Family'
),
(
'suborder'
,
'Suborder'
),
(
'subkingdom'
,
'Subkingdom'
),
(
'subspecies'
,
'Subspecies'
),
(
'no_rank'
,
'No rank'
),
(
'subfamily'
,
'Subfamily'
),
(
'varietas'
,
'Varietas'
),
(
'species_group'
,
'Species group'
),
]
tax_id
=
models
.
CharField
(
max_length
=
20
,
unique
=
True
,
db_index
=
True
)
name
=
models
.
CharField
(
max_length
=
200
,
default
=
NAME_DEFAULT
)
rank
=
models
.
CharField
(
max_length
=
20
,
choices
=
RANK_CHOICES
)
parent
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'children'
,
on_delete
=
models
.
SET_NULL
,
null
=
True
,
blank
=
True
,
)
def
__str__
(
self
):
return
f
"
{
self
.
name
}
"
class
Meta
:
verbose_name_plural
=
"Taxonomy"
backend/metagenedb/apps/catalog/serializers/__init__.py
0 → 100644
View file @
f00c7a44
from
.function
import
FunctionSerializer
from
.gene
import
GeneSerializer
from
.taxonomy
import
TaxonomySerializer
__all__
=
[
'FunctionSerializer'
,
'GeneSerializer'
,
'TaxonomySerializer'
]
\ No newline at end of file
backend/metagenedb/apps/catalog/serializers/function.py
0 → 100644
View file @
f00c7a44
from
rest_framework
import
serializers
from
metagenedb.apps.catalog.models
import
Function
class
FunctionSerializer
(
serializers
.
ModelSerializer
):
class
Meta
:
model
=
Function
fields
=
(
'function_id'
,
'source'
,
'name'
)
\ No newline at end of file
backend/metagenedb/apps/catalog/serializers/gene.py
0 → 100644
View file @
f00c7a44
from
rest_framework
import
serializers
from
metagenedb.apps.catalog.models
import
Gene
from
metagenedb.apps.catalog.serializers
import
FunctionSerializer
class
GeneSerializer
(
serializers
.
ModelSerializer
):
functions
=
FunctionSerializer
(
many
=
True
,
read_only
=
True
)
class
Meta
:
model
=
Gene
fields
=
(
'gene_id'
,
'gene_length'
,
'functions'
)
\ No newline at end of file
backend/metagenedb/apps/catalog/serializers/taxonomy.py
0 → 100644
View file @
f00c7a44
from
rest_framework
import
serializers
from
metagenedb.apps.catalog.models
import
Taxonomy
class
TaxonomySerializer
(
serializers
.
ModelSerializer
):
parent_tax_id
=
serializers
.
SlugRelatedField
(
queryset
=
Taxonomy
.
objects
.
all
(),
slug_field
=
'tax_id'
,
source
=
'parent'
,
required
=
False
)
class
Meta
:
model
=
Taxonomy
fields
=
(
'tax_id'
,
'name'
,
'rank'
,
'parent_tax_id'
)
backend/metagenedb/apps/catalog/views/insertion_model.py
View file @
f00c7a44
...
...
@@ -9,6 +9,7 @@ class InsertionBase(ABC):
"""
MANY_TO_MANY_FIELDS
=
[]
FOREIGN_KEY_FIELDS
=
[]
SIMPLE_FIELDS
=
[]
# Fields you want to be able to create with the class
@
property
def
model
(
self
):
...
...
@@ -22,7 +23,10 @@ class InsertionBase(ABC):
self
.
full_dict
=
model_dict
.
copy
()
self
.
foreign_key_dict
=
extract_dict
(
model_dict
,
self
.
FOREIGN_KEY_FIELDS
)
self
.
many_to_many_dict
=
extract_dict
(
model_dict
,
self
.
MANY_TO_MANY_FIELDS
)
self
.
simple_dict
=
model_dict
.
copy
()
if
self
.
SIMPLE_FIELDS
:
self
.
simple_dict
=
extract_dict
(
model_dict
,
self
.
SIMPLE_FIELDS
)
else
:
self
.
simple_dict
=
model_dict
.
copy
()
self
.
obj
=
None
def
upsert_to_db
(
self
):
...
...
backend/scripts/populate_db/import_ncbi_taxonomy.py
0 → 100755
View file @
f00c7a44
#!/usr/bin/env python
import
argparse
import
logging
import
os
import
sys
import
django
from
metagenedb.utils.parsers
import
NCBITaxonomyLineParser
# Before model import, we need to called django.setup() to Load apps
os
.
environ
.
setdefault
(
"DJANGO_SETTINGS_MODULE"
,
"metagenedb.settings"
)
django
.
setup
()
from
metagenedb.apps.catalog.models
import
Taxonomy
# noqa
from
metagenedb.apps.catalog.serializers
import
TaxonomySerializer
# noqa
logging
.
basicConfig
(
level
=
logging
.
INFO
)
_LOGGER
=
logging
.
getLogger
(
__name__
)
def
import_names
(
taxonomy_names_file
,
select_class
=
"scientific name"
):
"""
Build and return a DICT {tax_id: taxe_name} for the chosen select_class
"""
_LOGGER
.
info
(
f
"Importing
{
select_class
}
from
{
taxonomy_names_file
}
..."
)
taxo_name_dict
=
{}
with
open
(
taxonomy_names_file
,
"r"
)
as
file
:
for
line
in
file
:
if
select_class
in
line
:
name
=
NCBITaxonomyLineParser
.
name
(
line
)
taxo_name_dict
[
name
.
get
(
'tax_id'
)]
=
name
.
get
(
'name_txt'
)
return
taxo_name_dict
def
create_taxo_nodes
(
taxonomy_nodes_file
,
taxo_name_dict
):
_LOGGER
.
info
(
f
"Create taxonomy objects from
{
taxonomy_nodes_file
}
..."
)
FOREIGN_KEY_FIELDS
=
[
'parent_tax_id'
]
with
open
(
taxonomy_nodes_file
,
"r"
)
as
file
:
for
i
in
file
:
node
=
NCBITaxonomyLineParser
.
node
(
i
)
node
[
'name'
]
=
taxo_name_dict
.
get
(
node
.
get
(
'tax_id'
),
"No name"
)
for
key
in
FOREIGN_KEY_FIELDS
:
del
node
[
key
]
serializer
=
TaxonomySerializer
(
data
=
node
)
if
serializer
.
is_valid
():
serializer
.
save
()
else
:
_LOGGER
.
warning
(
f
"Invalid data:
{
serializer
.
errors
}
. Insertion skipped. Data:
{
serializer
.
data
}
"
)
def
update_taxo_nodes
(
taxonomy_nodes_file
):
_LOGGER
.
info
(
f
"Linking taxonomy objects to parental nodes from
{
taxonomy_nodes_file
}
..."
)
with
open
(
taxonomy_nodes_file
,
"r"
)
as
file
:
for
i
in
file
:
node
=
NCBITaxonomyLineParser
.
node
(
i
)
taxo_obj
=
Taxonomy
.
objects
.
get
(
tax_id
=
node
.
get
(
'tax_id'
))
serializer
=
TaxonomySerializer
(
taxo_obj
,
data
=
node
)
if
serializer
.
is_valid
():
serializer
.
save
()
else
:
_LOGGER
.
warning
(
f
"Invalid data:
{
serializer
.
errors
}
. Insertion skipped. Data:
{
serializer
.
data
}
"
)
def
parse_arguments
():
"""
Defines parser.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
'Populate database from a given NCBI taxonomy files.'
)
# Common arguments for analysis and annotations
parser
.
add_argument
(
'--nodes'
,
help
=
'nodes.dmp file from ncbi_taxonomy'
,
required
=
True
)
parser
.
add_argument
(
'--names'
,
help
=
'names.dmp file from ncbi_taxonomy'
,
required
=
True
)
try
:
return
parser
.
parse_args
()
except
SystemExit
:
sys
.
exit
(
1
)
def
run
():
args
=
parse_arguments
()
taxonomy_names
=
import_names
(
args
.
names
)
create_taxo_nodes
(
args
.
nodes
,
taxonomy_names
)
update_taxo_nodes
(
args
.
nodes
)
if
__name__
==
"__main__"
:
run
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment