From f0f3419eabf393abb3a43dd0e4633dddae1c5457 Mon Sep 17 00:00:00 2001
From: Kenzo-Hugo Hillion <kenzo-hugo.hillion1@pasteur.fr>
Date: Mon, 24 Jun 2019 14:49:28 +0200
Subject: [PATCH] add route to get gene length repartition

---
 .../metagenedb/apps/catalog/admin/admin.py    | 28 -------------
 backend/metagenedb/apps/catalog/urls.py       |  1 +
 .../metagenedb/apps/catalog/views/__init__.py |  3 +-
 .../metagenedb/apps/catalog/views/genes.py    |  8 ++--
 .../apps/catalog/views/statistics.py          | 42 +++++++++++++++++++
 .../apps/catalog/views/test_statistics.py     | 22 ++++++++++
 6 files changed, 72 insertions(+), 32 deletions(-)
 delete mode 100644 backend/metagenedb/apps/catalog/admin/admin.py
 create mode 100644 backend/metagenedb/apps/catalog/views/statistics.py
 create mode 100644 backend/metagenedb/apps/catalog/views/test_statistics.py

diff --git a/backend/metagenedb/apps/catalog/admin/admin.py b/backend/metagenedb/apps/catalog/admin/admin.py
deleted file mode 100644
index a923519..0000000
--- a/backend/metagenedb/apps/catalog/admin/admin.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from django.contrib import admin
-
-from metagenedb.apps.catalog.models import Gene, Function, KeggOrthology
-
-
-@admin.register(Gene)
-class GeneAdmin(admin.ModelAdmin):
-
-    list_display = ('gene_id', 'gene_length', 'get_functions')
-    search_fields = ('gene_id',)
-
-    def get_functions(self, obj):
-        return ",".join([str(f) for f in obj.functions.all()])
-    get_functions.short_description = 'Functions'
-
-
-@admin.register(KeggOrthology)
-class KeggOrthologyAdmin(admin.ModelAdmin):
-
-    list_display = ('function_id', 'name', 'long_name', 'ec_number', 'source')
-    search_fields = ('function_id',)
-
-
-@admin.register(Function)
-class FunctionAdmin(admin.ModelAdmin):
-
-    list_display = ('function_id', 'name', 'source')
-    search_fields = ('function_id',)
diff --git a/backend/metagenedb/apps/catalog/urls.py b/backend/metagenedb/apps/catalog/urls.py
index e6afebc..7f65d9d 100644
--- a/backend/metagenedb/apps/catalog/urls.py
+++ b/backend/metagenedb/apps/catalog/urls.py
@@ -5,4 +5,5 @@ from . import views
 urlpatterns = [
     re_path(r'^api/genes/$', views.gene_list, name='genes'),
     re_path(r'^api/genes/(?P<gene_id>.*)$', views.gene_detail),
+    re_path(r'^api/gene_length$', views.gene_length, name='gene_length'),
 ]
diff --git a/backend/metagenedb/apps/catalog/views/__init__.py b/backend/metagenedb/apps/catalog/views/__init__.py
index 2f2e38f..492b576 100644
--- a/backend/metagenedb/apps/catalog/views/__init__.py
+++ b/backend/metagenedb/apps/catalog/views/__init__.py
@@ -1 +1,2 @@
-from .genes import gene_detail, gene_list  # noqa
\ No newline at end of file
+from .genes import gene_detail, gene_list  # noqa
+from .statistics import gene_length  # noqa
\ No newline at end of file
diff --git a/backend/metagenedb/apps/catalog/views/genes.py b/backend/metagenedb/apps/catalog/views/genes.py
index f3c0823..9dde3cb 100644
--- a/backend/metagenedb/apps/catalog/views/genes.py
+++ b/backend/metagenedb/apps/catalog/views/genes.py
@@ -30,9 +30,11 @@ def gene_list(request):
     if data.has_previous():
         previousPage = data.previous_page_number()
 
-    return Response({'data': serializer.data, 'count': paginator.count, 'numpages': paginator.num_pages,
-                     'nextlink': '/api/genes/?page=' + str(nextPage),
-                     'prevlink': '/api/genes/?page=' + str(previousPage)})
+    return Response(
+        {'data': serializer.data, 'count': paginator.count, 'numpages': paginator.num_pages,
+         'nextlink': '/api/genes/?page=' + str(nextPage),
+         'prevlink': '/api/genes/?page=' + str(previousPage)}
+    )
 
 
 @api_view(['GET'])
diff --git a/backend/metagenedb/apps/catalog/views/statistics.py b/backend/metagenedb/apps/catalog/views/statistics.py
new file mode 100644
index 0000000..2ac847d
--- /dev/null
+++ b/backend/metagenedb/apps/catalog/views/statistics.py
@@ -0,0 +1,42 @@
+import pandas as pd
+from rest_framework.decorators import api_view
+from rest_framework.response import Response
+
+from metagenedb.apps.catalog.models import Gene
+
+
+GENE_LENGTH_COL = 'gene_length'
+
+
+def _get_mask(df, rg, col_name):
+    """
+    rg is a range, e.g. (10-20)
+    """
+    return (df[col_name] >= rg[0]) & (df[col_name] < rg[1])
+
+
+def _count_windows(df, window_col, window_size=10000):
+    """
+    Count how many line of the df belong to each windows defined by the window_size for the window_col
+    :param df:
+    :param window_col: column concerned by the window
+    :param window_size: size of the window
+    :return: {'data': COUNTS_BY_WINDOW, 'labels': START-END}
+    """
+    all_ranges = [(i, i + window_size) for i in range(0, df[window_col].max(), window_size)]
+    data = []
+    labels = []
+    for rg in all_ranges:
+        labels.append(f"{rg[0]}-{rg[1]-1}")
+        data.append(df[_get_mask(df, rg, window_col)].count()['gene_length'])
+    return {
+        'data': data,
+        'labels': labels
+    }
+
+@api_view(['GET'])
+def gene_length(request):
+    df = pd.DataFrame(list(Gene.objects.all().values(GENE_LENGTH_COL)))
+    return Response({
+        'data': _count_windows(df, GENE_LENGTH_COL)
+    })
diff --git a/backend/metagenedb/apps/catalog/views/test_statistics.py b/backend/metagenedb/apps/catalog/views/test_statistics.py
new file mode 100644
index 0000000..6411aba
--- /dev/null
+++ b/backend/metagenedb/apps/catalog/views/test_statistics.py
@@ -0,0 +1,22 @@
+import pandas as pd
+from unittest import TestCase
+
+from .statistics import _count_windows
+
+
+class TestCountWindows(TestCase):
+
+    def setUp(self):
+        self.window_col = "gene_length"
+        self.df = pd.DataFrame(
+            [22, 29, 35],
+            columns=[self.window_col]
+        )
+
+    def test_simple_count_window10(self):
+        expected_dict = {
+            'labels': ['0-9', '10-19', '20-29', '30-39'],
+            'data': [0, 0, 2, 1]
+        }
+        test_dict = _count_windows(self.df, self.window_col, 10)
+        self.assertDictEqual(test_dict, expected_dict)
-- 
GitLab