From f0f3419eabf393abb3a43dd0e4633dddae1c5457 Mon Sep 17 00:00:00 2001 From: Kenzo-Hugo Hillion <kenzo-hugo.hillion1@pasteur.fr> Date: Mon, 24 Jun 2019 14:49:28 +0200 Subject: [PATCH] add route to get gene length repartition --- .../metagenedb/apps/catalog/admin/admin.py | 28 ------------- backend/metagenedb/apps/catalog/urls.py | 1 + .../metagenedb/apps/catalog/views/__init__.py | 3 +- .../metagenedb/apps/catalog/views/genes.py | 8 ++-- .../apps/catalog/views/statistics.py | 42 +++++++++++++++++++ .../apps/catalog/views/test_statistics.py | 22 ++++++++++ 6 files changed, 72 insertions(+), 32 deletions(-) delete mode 100644 backend/metagenedb/apps/catalog/admin/admin.py create mode 100644 backend/metagenedb/apps/catalog/views/statistics.py create mode 100644 backend/metagenedb/apps/catalog/views/test_statistics.py diff --git a/backend/metagenedb/apps/catalog/admin/admin.py b/backend/metagenedb/apps/catalog/admin/admin.py deleted file mode 100644 index a923519..0000000 --- a/backend/metagenedb/apps/catalog/admin/admin.py +++ /dev/null @@ -1,28 +0,0 @@ -from django.contrib import admin - -from metagenedb.apps.catalog.models import Gene, Function, KeggOrthology - - -@admin.register(Gene) -class GeneAdmin(admin.ModelAdmin): - - list_display = ('gene_id', 'gene_length', 'get_functions') - search_fields = ('gene_id',) - - def get_functions(self, obj): - return ",".join([str(f) for f in obj.functions.all()]) - get_functions.short_description = 'Functions' - - -@admin.register(KeggOrthology) -class KeggOrthologyAdmin(admin.ModelAdmin): - - list_display = ('function_id', 'name', 'long_name', 'ec_number', 'source') - search_fields = ('function_id',) - - -@admin.register(Function) -class FunctionAdmin(admin.ModelAdmin): - - list_display = ('function_id', 'name', 'source') - search_fields = ('function_id',) diff --git a/backend/metagenedb/apps/catalog/urls.py b/backend/metagenedb/apps/catalog/urls.py index e6afebc..7f65d9d 100644 --- a/backend/metagenedb/apps/catalog/urls.py +++ b/backend/metagenedb/apps/catalog/urls.py @@ -5,4 +5,5 @@ from . import views urlpatterns = [ re_path(r'^api/genes/$', views.gene_list, name='genes'), re_path(r'^api/genes/(?P<gene_id>.*)$', views.gene_detail), + re_path(r'^api/gene_length$', views.gene_length, name='gene_length'), ] diff --git a/backend/metagenedb/apps/catalog/views/__init__.py b/backend/metagenedb/apps/catalog/views/__init__.py index 2f2e38f..492b576 100644 --- a/backend/metagenedb/apps/catalog/views/__init__.py +++ b/backend/metagenedb/apps/catalog/views/__init__.py @@ -1 +1,2 @@ -from .genes import gene_detail, gene_list # noqa \ No newline at end of file +from .genes import gene_detail, gene_list # noqa +from .statistics import gene_length # noqa \ No newline at end of file diff --git a/backend/metagenedb/apps/catalog/views/genes.py b/backend/metagenedb/apps/catalog/views/genes.py index f3c0823..9dde3cb 100644 --- a/backend/metagenedb/apps/catalog/views/genes.py +++ b/backend/metagenedb/apps/catalog/views/genes.py @@ -30,9 +30,11 @@ def gene_list(request): if data.has_previous(): previousPage = data.previous_page_number() - return Response({'data': serializer.data, 'count': paginator.count, 'numpages': paginator.num_pages, - 'nextlink': '/api/genes/?page=' + str(nextPage), - 'prevlink': '/api/genes/?page=' + str(previousPage)}) + return Response( + {'data': serializer.data, 'count': paginator.count, 'numpages': paginator.num_pages, + 'nextlink': '/api/genes/?page=' + str(nextPage), + 'prevlink': '/api/genes/?page=' + str(previousPage)} + ) @api_view(['GET']) diff --git a/backend/metagenedb/apps/catalog/views/statistics.py b/backend/metagenedb/apps/catalog/views/statistics.py new file mode 100644 index 0000000..2ac847d --- /dev/null +++ b/backend/metagenedb/apps/catalog/views/statistics.py @@ -0,0 +1,42 @@ +import pandas as pd +from rest_framework.decorators import api_view +from rest_framework.response import Response + +from metagenedb.apps.catalog.models import Gene + + +GENE_LENGTH_COL = 'gene_length' + + +def _get_mask(df, rg, col_name): + """ + rg is a range, e.g. (10-20) + """ + return (df[col_name] >= rg[0]) & (df[col_name] < rg[1]) + + +def _count_windows(df, window_col, window_size=10000): + """ + Count how many line of the df belong to each windows defined by the window_size for the window_col + :param df: + :param window_col: column concerned by the window + :param window_size: size of the window + :return: {'data': COUNTS_BY_WINDOW, 'labels': START-END} + """ + all_ranges = [(i, i + window_size) for i in range(0, df[window_col].max(), window_size)] + data = [] + labels = [] + for rg in all_ranges: + labels.append(f"{rg[0]}-{rg[1]-1}") + data.append(df[_get_mask(df, rg, window_col)].count()['gene_length']) + return { + 'data': data, + 'labels': labels + } + +@api_view(['GET']) +def gene_length(request): + df = pd.DataFrame(list(Gene.objects.all().values(GENE_LENGTH_COL))) + return Response({ + 'data': _count_windows(df, GENE_LENGTH_COL) + }) diff --git a/backend/metagenedb/apps/catalog/views/test_statistics.py b/backend/metagenedb/apps/catalog/views/test_statistics.py new file mode 100644 index 0000000..6411aba --- /dev/null +++ b/backend/metagenedb/apps/catalog/views/test_statistics.py @@ -0,0 +1,22 @@ +import pandas as pd +from unittest import TestCase + +from .statistics import _count_windows + + +class TestCountWindows(TestCase): + + def setUp(self): + self.window_col = "gene_length" + self.df = pd.DataFrame( + [22, 29, 35], + columns=[self.window_col] + ) + + def test_simple_count_window10(self): + expected_dict = { + 'labels': ['0-9', '10-19', '20-29', '30-39'], + 'data': [0, 0, 2, 1] + } + test_dict = _count_windows(self.df, self.window_col, 10) + self.assertDictEqual(test_dict, expected_dict) -- GitLab