Commit 751e1571 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

index gene length and improve computation

parent a9a945d5
......@@ -6,8 +6,8 @@ from metagenedb.apps.catalog.models import Gene
@admin.register(Gene)
class GeneAdmin(admin.ModelAdmin):
list_display = ('gene_id', 'gene_name', 'length', 'get_functions', 'get_taxonomy')
search_fields = ('gene_name',)
list_display = ('gene_id', 'name', 'length', 'get_functions', 'get_taxonomy')
search_fields = ('name',)
def get_functions(self, obj):
if obj.functions.all():
......
......@@ -19,7 +19,7 @@ class GeneFactory(DjangoModelFactory):
model = models.Gene
gene_id = FuzzyLowerText(prefix='gene-', length=15)
gene_name = fuzzy.FuzzyText(prefix='name-', length=15)
name = fuzzy.FuzzyText(prefix='name-', length=15)
length = fuzzy.FuzzyInteger(200, 10000)
taxonomy = SubFactory(TaxonomyFactory)
......
......@@ -7,6 +7,8 @@ from slugify import slugify
from metagenedb.apps.catalog.models import Statistics
from metagenedb.apps.catalog.operations.statistics import GeneStatistics
from metagenedb.common.utils.profiling import profile
logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)
......@@ -67,6 +69,7 @@ class ComputeGeneLength(ComputeStatistics):
},
}
@profile('/Users/khillion/Sandbox/compute_genes.prof')
def all(self):
gene_stats = GeneStatistics()
for category, filters in self.CATEGORIES.items():
......
......@@ -94,7 +94,7 @@ class ImportIGCGenes(object):
def _format_for_model(self, igc_dict):
gene_dict = {}
gene_dict['gene_name'] = igc_dict['gene_id']
gene_dict['name'] = igc_dict['gene_id']
gene_dict['gene_id'] = slugify(igc_dict['gene_id'])
gene_dict['length'] = igc_dict['length']
if not self.skip_tax:
......@@ -108,7 +108,7 @@ class ImportIGCGenes(object):
try:
Gene.objects.bulk_update(
list(gene_instances.values()),
['gene_name', 'taxonomy', 'length']
['name', 'taxonomy', 'length']
)
self.updated_genes += len(gene_instances.keys())
except Exception as exception:
......
......@@ -26,7 +26,7 @@ class TestParseGene(BaseTestImportIGCGenes):
def setUp(self):
raw_data = [
'gene_id',
'gene_name',
'name',
'length',
'gene_completeness_status',
'cohort_origin',
......@@ -48,7 +48,7 @@ class TestParseGene(BaseTestImportIGCGenes):
This test should failed and need to be updated when SELECTED_KEYS are changed
"""
expected_dict = {
'gene_id': 'gene_name',
'gene_id': 'name',
'length': 'length',
'kegg_ko': ['kegg'],
'eggnog': ['eggnog'],
......@@ -64,7 +64,7 @@ class TestParseGene(BaseTestImportIGCGenes):
"""
selected_keys = ['gene_id', 'length']
expected_dict = {
'gene_id': 'gene_name',
'gene_id': 'name',
'length': 'length'
}
tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
......@@ -76,7 +76,7 @@ class TestParseGene(BaseTestImportIGCGenes):
"""
selected_keys = ['gene_id', 'length', 'secret_code']
expected_dict = {
'gene_id': 'gene_name',
'gene_id': 'name',
'length': 'length'
}
tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
......@@ -147,13 +147,13 @@ class TestCreateOrUpdateGenes(APITestCase, BaseTestImportIGCGenes):
def test_create_1_update_1(self):
gene_to_update = {
'gene_id': self.gene.gene_id,
'gene_name': 'Updated Gene',
'name': 'Updated Gene',
'length': 2235,
'taxonomy': self.taxo_list[0]
}
gene_to_create = {
'gene_id': 'gene-create-123',
'gene_name': 'Created Gene',
'name': 'Created Gene',
'length': 5629,
'taxonomy': self.taxo_list[1]
}
......
# Generated by Django 3.0.1 on 2019-12-30 10:31
from django.db.models.indexes import Index
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('catalog', '0020_statistics'),
]
operations = [
migrations.RenameField(
model_name='gene',
old_name='gene_name',
new_name='name',
),
migrations.AlterField(
model_name='gene',
name='length',
field=models.PositiveIntegerField(db_index=True),
),
migrations.AddIndex('Gene', Index(fields=['length'], name='length_index')),
]
......@@ -4,9 +4,9 @@ from .function import Function
class Gene(models.Model):
gene_name = models.CharField(max_length=100, unique=True)
name = models.CharField(max_length=100, unique=True)
gene_id = models.SlugField(max_length=100, db_index=True, unique=True)
length = models.PositiveIntegerField()
length = models.PositiveIntegerField(db_index=True)
functions = models.ManyToManyField(Function, through='GeneFunction')
taxonomy = models.ForeignKey(
'Taxonomy', related_name='genes',
......
......@@ -49,11 +49,6 @@ class GeneStatistics(Statistics):
queryset = self.get_queryset().only('length')
else:
queryset = self.get_queryset(filters=filters).distinct().only('length')
if not queryset:
return {
'counts': [],
'labels': []
}
length_max = queryset.aggregate(Max('length')).get('length__max', 0)
stop_at = length_max if length_max < stop_at else stop_at
all_ranges = [[i, i + window_size] for i in range(0, stop_at + 1, window_size)]
......
......@@ -69,7 +69,7 @@ class GeneSerializer(serializers.ModelSerializer):
class Meta:
model = Gene
list_serializer_class = GeneListSerializer
fields = ('gene_id', 'gene_name', 'length', 'functions', 'taxonomy')
fields = ('gene_id', 'name', 'length', 'functions', 'taxonomy')
def _extract_many_to_many(self, validated_data, info):
many_to_many = {}
......
......@@ -8,7 +8,7 @@ class TestIGCLineParser(TestCase):
def test_gene(self):
raw_data = [
'gene_id',
'gene_name',
'name',
'length',
'gene_completeness_status',
'cohort_origin',
......@@ -50,7 +50,7 @@ class TestIGCLineParser(TestCase):
def test_multiple_functions(self):
raw_data = [
'gene_id',
'gene_name',
'name',
'length',
'gene_completeness_status',
'cohort_origin',
......@@ -80,7 +80,7 @@ class TestIGCLineParser(TestCase):
def test_multiple_same_functions(self):
raw_data = [
'gene_id',
'gene_name',
'name',
'length',
'gene_completeness_status',
'cohort_origin',
......
......@@ -150,7 +150,7 @@ class ImportIGCGenes(object):
return clean_functions
def _clean_gene(self, gene_dict):
gene_dict['gene_name'] = gene_dict['gene_id']
gene_dict['name'] = gene_dict['gene_id']
gene_dict['gene_id'] = slugify(gene_dict['gene_id'])
gene_dict['functions'] = [
{'source': 'kegg', 'function_id': v} for v in gene_dict.pop('kegg_ko') if v != 'unknown'] + \
......
......@@ -20,7 +20,7 @@ class TestParseGene(TestCase):
def setUp(self):
raw_data = [
'gene_id',
'gene_name',
'name',
'length',
'gene_completeness_status',
'cohort_origin',
......@@ -42,7 +42,7 @@ class TestParseGene(TestCase):
This test should failed and need to be updated when SELECTED_KEYS are changed
"""
expected_dict = {
'gene_id': 'gene_name',
'gene_id': 'name',
'length': 'length',
'kegg_ko': ['kegg'],
'eggnog': ['eggnog'],
......@@ -58,7 +58,7 @@ class TestParseGene(TestCase):
"""
selected_keys = ['gene_id', 'length']
expected_dict = {
'gene_id': 'gene_name',
'gene_id': 'name',
'length': 'length'
}
tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
......@@ -70,7 +70,7 @@ class TestParseGene(TestCase):
"""
selected_keys = ['gene_id', 'length', 'secret_code']
expected_dict = {
'gene_id': 'gene_name',
'gene_id': 'name',
'length': 'length'
}
tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
......@@ -93,7 +93,7 @@ class TestCleanGene(TestCase):
def test_clean_gene(self):
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'name': 'gene.01',
'length': 135,
'functions': [
{'source': 'kegg', 'function_id': 'K00001'},
......@@ -107,7 +107,7 @@ class TestCleanGene(TestCase):
self.import_igc_genes.skip_functions = True
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'name': 'gene.01',
'length': 135,
}
test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
......@@ -122,7 +122,7 @@ class TestCleanGene(TestCase):
}
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'name': 'gene.01',
'functions': [{'function_id': 'COG1', 'source': 'eggnog'}],
'length': 135
}
......@@ -138,7 +138,7 @@ class TestCleanGene(TestCase):
}
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'name': 'gene.01',
'length': 135
}
test_gene_dict = self.import_igc_genes._clean_gene(gene_dict)
......
......@@ -86,7 +86,7 @@ export default {
},
{
title: 'Name',
content: response.data.gene_name,
content: response.data.name,
},
{
title: 'Length',
......
......@@ -20,7 +20,7 @@
>
<template v-slot:items="props">
<td><a :href="/gene-detail/ + props.item.gene_id">{{ props.item.gene_id }}</a></td>
<td>{{ props.item.gene_name }}</td>
<td>{{ props.item.name }}</td>
<td class="text-xs">{{ props.item.length }}</td>
<td class="text-xs"></td>
<td class="text-xs">{{ props.item.functions }}</td>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment