Commit f0f3419e authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

add route to get gene length repartition

parent 6356c972
Pipeline #12756 failed with stage
in 46 seconds
from django.contrib import admin
from metagenedb.apps.catalog.models import Gene, Function, KeggOrthology
@admin.register(Gene)
class GeneAdmin(admin.ModelAdmin):
list_display = ('gene_id', 'gene_length', 'get_functions')
search_fields = ('gene_id',)
def get_functions(self, obj):
return ",".join([str(f) for f in obj.functions.all()])
get_functions.short_description = 'Functions'
@admin.register(KeggOrthology)
class KeggOrthologyAdmin(admin.ModelAdmin):
list_display = ('function_id', 'name', 'long_name', 'ec_number', 'source')
search_fields = ('function_id',)
@admin.register(Function)
class FunctionAdmin(admin.ModelAdmin):
list_display = ('function_id', 'name', 'source')
search_fields = ('function_id',)
......@@ -5,4 +5,5 @@ from . import views
urlpatterns = [
re_path(r'^api/genes/$', views.gene_list, name='genes'),
re_path(r'^api/genes/(?P<gene_id>.*)$', views.gene_detail),
re_path(r'^api/gene_length$', views.gene_length, name='gene_length'),
]
from .genes import gene_detail, gene_list # noqa
\ No newline at end of file
from .genes import gene_detail, gene_list # noqa
from .statistics import gene_length # noqa
\ No newline at end of file
......@@ -30,9 +30,11 @@ def gene_list(request):
if data.has_previous():
previousPage = data.previous_page_number()
return Response({'data': serializer.data, 'count': paginator.count, 'numpages': paginator.num_pages,
'nextlink': '/api/genes/?page=' + str(nextPage),
'prevlink': '/api/genes/?page=' + str(previousPage)})
return Response(
{'data': serializer.data, 'count': paginator.count, 'numpages': paginator.num_pages,
'nextlink': '/api/genes/?page=' + str(nextPage),
'prevlink': '/api/genes/?page=' + str(previousPage)}
)
@api_view(['GET'])
......
import pandas as pd
from rest_framework.decorators import api_view
from rest_framework.response import Response
from metagenedb.apps.catalog.models import Gene
GENE_LENGTH_COL = 'gene_length'
def _get_mask(df, rg, col_name):
"""
rg is a range, e.g. (10-20)
"""
return (df[col_name] >= rg[0]) & (df[col_name] < rg[1])
def _count_windows(df, window_col, window_size=10000):
"""
Count how many line of the df belong to each windows defined by the window_size for the window_col
:param df:
:param window_col: column concerned by the window
:param window_size: size of the window
:return: {'data': COUNTS_BY_WINDOW, 'labels': START-END}
"""
all_ranges = [(i, i + window_size) for i in range(0, df[window_col].max(), window_size)]
data = []
labels = []
for rg in all_ranges:
labels.append(f"{rg[0]}-{rg[1]-1}")
data.append(df[_get_mask(df, rg, window_col)].count()['gene_length'])
return {
'data': data,
'labels': labels
}
@api_view(['GET'])
def gene_length(request):
df = pd.DataFrame(list(Gene.objects.all().values(GENE_LENGTH_COL)))
return Response({
'data': _count_windows(df, GENE_LENGTH_COL)
})
import pandas as pd
from unittest import TestCase
from .statistics import _count_windows
class TestCountWindows(TestCase):
def setUp(self):
self.window_col = "gene_length"
self.df = pd.DataFrame(
[22, 29, 35],
columns=[self.window_col]
)
def test_simple_count_window10(self):
expected_dict = {
'labels': ['0-9', '10-19', '20-29', '30-39'],
'data': [0, 0, 2, 1]
}
test_dict = _count_windows(self.df, self.window_col, 10)
self.assertDictEqual(test_dict, expected_dict)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment