Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
751e1571
Commit
751e1571
authored
Dec 30, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
index gene length and improve computation
parent
a9a945d5
Changes
14
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/admin/gene.py
View file @
751e1571
...
...
@@ -6,8 +6,8 @@ from metagenedb.apps.catalog.models import Gene
@
admin
.
register
(
Gene
)
class
GeneAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'gene_id'
,
'
gene_
name'
,
'length'
,
'get_functions'
,
'get_taxonomy'
)
search_fields
=
(
'
gene_
name'
,)
list_display
=
(
'gene_id'
,
'name'
,
'length'
,
'get_functions'
,
'get_taxonomy'
)
search_fields
=
(
'name'
,)
def
get_functions
(
self
,
obj
):
if
obj
.
functions
.
all
():
...
...
backend/metagenedb/apps/catalog/factory/gene.py
View file @
751e1571
...
...
@@ -19,7 +19,7 @@ class GeneFactory(DjangoModelFactory):
model
=
models
.
Gene
gene_id
=
FuzzyLowerText
(
prefix
=
'gene-'
,
length
=
15
)
gene_
name
=
fuzzy
.
FuzzyText
(
prefix
=
'name-'
,
length
=
15
)
name
=
fuzzy
.
FuzzyText
(
prefix
=
'name-'
,
length
=
15
)
length
=
fuzzy
.
FuzzyInteger
(
200
,
10000
)
taxonomy
=
SubFactory
(
TaxonomyFactory
)
...
...
backend/metagenedb/apps/catalog/management/commands/compute_stats.py
View file @
751e1571
...
...
@@ -7,6 +7,8 @@ from slugify import slugify
from
metagenedb.apps.catalog.models
import
Statistics
from
metagenedb.apps.catalog.operations.statistics
import
GeneStatistics
from
metagenedb.common.utils.profiling
import
profile
logging
.
basicConfig
(
format
=
'[%(asctime)s] %(levelname)s:%(name)s:%(message)s'
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -67,6 +69,7 @@ class ComputeGeneLength(ComputeStatistics):
},
}
@
profile
(
'/Users/khillion/Sandbox/compute_genes.prof'
)
def
all
(
self
):
gene_stats
=
GeneStatistics
()
for
category
,
filters
in
self
.
CATEGORIES
.
items
():
...
...
backend/metagenedb/apps/catalog/management/commands/import_igc_annotation.py
View file @
751e1571
...
...
@@ -94,7 +94,7 @@ class ImportIGCGenes(object):
def
_format_for_model
(
self
,
igc_dict
):
gene_dict
=
{}
gene_dict
[
'
gene_
name'
]
=
igc_dict
[
'gene_id'
]
gene_dict
[
'name'
]
=
igc_dict
[
'gene_id'
]
gene_dict
[
'gene_id'
]
=
slugify
(
igc_dict
[
'gene_id'
])
gene_dict
[
'length'
]
=
igc_dict
[
'length'
]
if
not
self
.
skip_tax
:
...
...
@@ -108,7 +108,7 @@ class ImportIGCGenes(object):
try
:
Gene
.
objects
.
bulk_update
(
list
(
gene_instances
.
values
()),
[
'
gene_
name'
,
'taxonomy'
,
'length'
]
[
'name'
,
'taxonomy'
,
'length'
]
)
self
.
updated_genes
+=
len
(
gene_instances
.
keys
())
except
Exception
as
exception
:
...
...
backend/metagenedb/apps/catalog/management/commands/test_import_igc_annotation.py
View file @
751e1571
...
...
@@ -26,7 +26,7 @@ class TestParseGene(BaseTestImportIGCGenes):
def
setUp
(
self
):
raw_data
=
[
'gene_id'
,
'
gene_
name'
,
'name'
,
'length'
,
'gene_completeness_status'
,
'cohort_origin'
,
...
...
@@ -48,7 +48,7 @@ class TestParseGene(BaseTestImportIGCGenes):
This test should failed and need to be updated when SELECTED_KEYS are changed
"""
expected_dict
=
{
'gene_id'
:
'
gene_
name'
,
'gene_id'
:
'name'
,
'length'
:
'length'
,
'kegg_ko'
:
[
'kegg'
],
'eggnog'
:
[
'eggnog'
],
...
...
@@ -64,7 +64,7 @@ class TestParseGene(BaseTestImportIGCGenes):
"""
selected_keys
=
[
'gene_id'
,
'length'
]
expected_dict
=
{
'gene_id'
:
'
gene_
name'
,
'gene_id'
:
'name'
,
'length'
:
'length'
}
tested_dict
=
self
.
import_igc_genes
.
_parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
...
...
@@ -76,7 +76,7 @@ class TestParseGene(BaseTestImportIGCGenes):
"""
selected_keys
=
[
'gene_id'
,
'length'
,
'secret_code'
]
expected_dict
=
{
'gene_id'
:
'
gene_
name'
,
'gene_id'
:
'name'
,
'length'
:
'length'
}
tested_dict
=
self
.
import_igc_genes
.
_parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
...
...
@@ -147,13 +147,13 @@ class TestCreateOrUpdateGenes(APITestCase, BaseTestImportIGCGenes):
def
test_create_1_update_1
(
self
):
gene_to_update
=
{
'gene_id'
:
self
.
gene
.
gene_id
,
'
gene_
name'
:
'Updated Gene'
,
'name'
:
'Updated Gene'
,
'length'
:
2235
,
'taxonomy'
:
self
.
taxo_list
[
0
]
}
gene_to_create
=
{
'gene_id'
:
'gene-create-123'
,
'
gene_
name'
:
'Created Gene'
,
'name'
:
'Created Gene'
,
'length'
:
5629
,
'taxonomy'
:
self
.
taxo_list
[
1
]
}
...
...
backend/metagenedb/apps/catalog/migrations/0021_index_gene_length.py
0 → 100644
View file @
751e1571
# Generated by Django 3.0.1 on 2019-12-30 10:31
from
django.db.models.indexes
import
Index
from
django.db
import
migrations
,
models
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0020_statistics'
),
]
operations
=
[
migrations
.
RenameField
(
model_name
=
'gene'
,
old_name
=
'gene_name'
,
new_name
=
'name'
,
),
migrations
.
AlterField
(
model_name
=
'gene'
,
name
=
'length'
,
field
=
models
.
PositiveIntegerField
(
db_index
=
True
),
),
migrations
.
AddIndex
(
'Gene'
,
Index
(
fields
=
[
'length'
],
name
=
'length_index'
)),
]
backend/metagenedb/apps/catalog/models/gene.py
View file @
751e1571
...
...
@@ -4,9 +4,9 @@ from .function import Function
class
Gene
(
models
.
Model
):
gene_
name
=
models
.
CharField
(
max_length
=
100
,
unique
=
True
)
name
=
models
.
CharField
(
max_length
=
100
,
unique
=
True
)
gene_id
=
models
.
SlugField
(
max_length
=
100
,
db_index
=
True
,
unique
=
True
)
length
=
models
.
PositiveIntegerField
()
length
=
models
.
PositiveIntegerField
(
db_index
=
True
)
functions
=
models
.
ManyToManyField
(
Function
,
through
=
'GeneFunction'
)
taxonomy
=
models
.
ForeignKey
(
'Taxonomy'
,
related_name
=
'genes'
,
...
...
backend/metagenedb/apps/catalog/operations/statistics.py
View file @
751e1571
...
...
@@ -49,11 +49,6 @@ class GeneStatistics(Statistics):
queryset
=
self
.
get_queryset
().
only
(
'length'
)
else
:
queryset
=
self
.
get_queryset
(
filters
=
filters
).
distinct
().
only
(
'length'
)
if
not
queryset
:
return
{
'counts'
:
[],
'labels'
:
[]
}
length_max
=
queryset
.
aggregate
(
Max
(
'length'
)).
get
(
'length__max'
,
0
)
stop_at
=
length_max
if
length_max
<
stop_at
else
stop_at
all_ranges
=
[[
i
,
i
+
window_size
]
for
i
in
range
(
0
,
stop_at
+
1
,
window_size
)]
...
...
backend/metagenedb/apps/catalog/serializers/gene.py
View file @
751e1571
...
...
@@ -69,7 +69,7 @@ class GeneSerializer(serializers.ModelSerializer):
class
Meta
:
model
=
Gene
list_serializer_class
=
GeneListSerializer
fields
=
(
'gene_id'
,
'
gene_
name'
,
'length'
,
'functions'
,
'taxonomy'
)
fields
=
(
'gene_id'
,
'name'
,
'length'
,
'functions'
,
'taxonomy'
)
def
_extract_many_to_many
(
self
,
validated_data
,
info
):
many_to_many
=
{}
...
...
backend/metagenedb/common/utils/parsers/test_igc.py
View file @
751e1571
...
...
@@ -8,7 +8,7 @@ class TestIGCLineParser(TestCase):
def
test_gene
(
self
):
raw_data
=
[
'gene_id'
,
'
gene_
name'
,
'name'
,
'length'
,
'gene_completeness_status'
,
'cohort_origin'
,
...
...
@@ -50,7 +50,7 @@ class TestIGCLineParser(TestCase):
def
test_multiple_functions
(
self
):
raw_data
=
[
'gene_id'
,
'
gene_
name'
,
'name'
,
'length'
,
'gene_completeness_status'
,
'cohort_origin'
,
...
...
@@ -80,7 +80,7 @@ class TestIGCLineParser(TestCase):
def
test_multiple_same_functions
(
self
):
raw_data
=
[
'gene_id'
,
'
gene_
name'
,
'name'
,
'length'
,
'gene_completeness_status'
,
'cohort_origin'
,
...
...
backend/scripts/populate_db/import_igc_data.py
View file @
751e1571
...
...
@@ -150,7 +150,7 @@ class ImportIGCGenes(object):
return
clean_functions
def
_clean_gene
(
self
,
gene_dict
):
gene_dict
[
'
gene_
name'
]
=
gene_dict
[
'gene_id'
]
gene_dict
[
'name'
]
=
gene_dict
[
'gene_id'
]
gene_dict
[
'gene_id'
]
=
slugify
(
gene_dict
[
'gene_id'
])
gene_dict
[
'functions'
]
=
[
{
'source'
:
'kegg'
,
'function_id'
:
v
}
for
v
in
gene_dict
.
pop
(
'kegg_ko'
)
if
v
!=
'unknown'
]
+
\
...
...
backend/scripts/populate_db/test_import_igc_data.py
View file @
751e1571
...
...
@@ -20,7 +20,7 @@ class TestParseGene(TestCase):
def
setUp
(
self
):
raw_data
=
[
'gene_id'
,
'
gene_
name'
,
'name'
,
'length'
,
'gene_completeness_status'
,
'cohort_origin'
,
...
...
@@ -42,7 +42,7 @@ class TestParseGene(TestCase):
This test should failed and need to be updated when SELECTED_KEYS are changed
"""
expected_dict
=
{
'gene_id'
:
'
gene_
name'
,
'gene_id'
:
'name'
,
'length'
:
'length'
,
'kegg_ko'
:
[
'kegg'
],
'eggnog'
:
[
'eggnog'
],
...
...
@@ -58,7 +58,7 @@ class TestParseGene(TestCase):
"""
selected_keys
=
[
'gene_id'
,
'length'
]
expected_dict
=
{
'gene_id'
:
'
gene_
name'
,
'gene_id'
:
'name'
,
'length'
:
'length'
}
tested_dict
=
self
.
import_igc_genes
.
_parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
...
...
@@ -70,7 +70,7 @@ class TestParseGene(TestCase):
"""
selected_keys
=
[
'gene_id'
,
'length'
,
'secret_code'
]
expected_dict
=
{
'gene_id'
:
'
gene_
name'
,
'gene_id'
:
'name'
,
'length'
:
'length'
}
tested_dict
=
self
.
import_igc_genes
.
_parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
...
...
@@ -93,7 +93,7 @@ class TestCleanGene(TestCase):
def
test_clean_gene
(
self
):
expected_gene_dict
=
{
'gene_id'
:
'gene-01'
,
'
gene_
name'
:
'gene.01'
,
'name'
:
'gene.01'
,
'length'
:
135
,
'functions'
:
[
{
'source'
:
'kegg'
,
'function_id'
:
'K00001'
},
...
...
@@ -107,7 +107,7 @@ class TestCleanGene(TestCase):
self
.
import_igc_genes
.
skip_functions
=
True
expected_gene_dict
=
{
'gene_id'
:
'gene-01'
,
'
gene_
name'
:
'gene.01'
,
'name'
:
'gene.01'
,
'length'
:
135
,
}
test_gene_dict
=
self
.
import_igc_genes
.
_clean_gene
(
self
.
gene_dict
)
...
...
@@ -122,7 +122,7 @@ class TestCleanGene(TestCase):
}
expected_gene_dict
=
{
'gene_id'
:
'gene-01'
,
'
gene_
name'
:
'gene.01'
,
'name'
:
'gene.01'
,
'functions'
:
[{
'function_id'
:
'COG1'
,
'source'
:
'eggnog'
}],
'length'
:
135
}
...
...
@@ -138,7 +138,7 @@ class TestCleanGene(TestCase):
}
expected_gene_dict
=
{
'gene_id'
:
'gene-01'
,
'
gene_
name'
:
'gene.01'
,
'name'
:
'gene.01'
,
'length'
:
135
}
test_gene_dict
=
self
.
import_igc_genes
.
_clean_gene
(
gene_dict
)
...
...
frontend/src/views/GeneDetail.vue
View file @
751e1571
...
...
@@ -86,7 +86,7 @@ export default {
},
{
title
:
'
Name
'
,
content
:
response
.
data
.
gene_
name
,
content
:
response
.
data
.
name
,
},
{
title
:
'
Length
'
,
...
...
frontend/src/views/Genes.vue
View file @
751e1571
...
...
@@ -20,7 +20,7 @@
>
<template
v-slot:items=
"props"
>
<td><a
:href=
"/gene-detail/ + props.item.gene_id"
>
{{
props
.
item
.
gene_id
}}
</a></td>
<td>
{{
props
.
item
.
gene_
name
}}
</td>
<td>
{{
props
.
item
.
name
}}
</td>
<td
class=
"text-xs"
>
{{
props
.
item
.
length
}}
</td>
<td
class=
"text-xs"
></td>
<td
class=
"text-xs"
>
{{
props
.
item
.
functions
}}
</td>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment