From 8cfa5c1eb2706a89479ff7c9d27fb61b6af92e59 Mon Sep 17 00:00:00 2001
From: Kenzo-Hugo Hillion <kenzo-hugo.hillion1@pasteur.fr>
Date: Wed, 27 Nov 2019 15:48:13 +0100
Subject: [PATCH] Add profiling tool for performance issues

---
 backend/Pipfile                              |  1 +
 backend/Pipfile.lock                         | 32 ++++++++++++--------
 backend/metagenedb/api/catalog/views/gene.py | 14 +++++----
 backend/metagenedb/common/utils/profiling.py | 16 ++++++++++
 4 files changed, 45 insertions(+), 18 deletions(-)
 create mode 100644 backend/metagenedb/common/utils/profiling.py

diff --git a/backend/Pipfile b/backend/Pipfile
index e447a70..182c8ba 100644
--- a/backend/Pipfile
+++ b/backend/Pipfile
@@ -58,6 +58,7 @@ packaging = "*"
 python-slugify = "*"
 master = {git = "https://github.com/khillion/bioapi.git"}
 marshmallow = "*"
+django-pandas = "*"
 
 [requires]
 python_version = "3.7"
diff --git a/backend/Pipfile.lock b/backend/Pipfile.lock
index 176351d..46cc1ba 100644
--- a/backend/Pipfile.lock
+++ b/backend/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "5998b6b97448fd635cc1b05787de28ac5ac3344ca6b8055831fca13790fc3f33"
+            "sha256": "4be3394e3c4abe5fc7b75328ef912eaba09e15365322b7493e256f2def2ff013"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -86,6 +86,14 @@
             "index": "pypi",
             "version": "==2.2.0"
         },
+        "django-pandas": {
+            "hashes": [
+                "sha256:738cc03ffb411eef3eb02334d1f5a5d40697099a92ac59eb39629c08a9c2d6fb",
+                "sha256:788f4652012a67d2c5849191b01af58255f7af815ab612bebca019854235a9bc"
+            ],
+            "index": "pypi",
+            "version": "==0.6.1"
+        },
         "djangorestframework": {
             "hashes": [
                 "sha256:5488aed8f8df5ec1d70f04b2114abc52ae6729748a176c453313834a9ee179c8",
@@ -812,11 +820,11 @@
         },
         "pluggy": {
             "hashes": [
-                "sha256:0db4b7601aae1d35b4a033282da476845aa19185c1e6964b25cf324b5e4ec3e6",
-                "sha256:fa5fa1622fa6dd5c030e9cad086fa19ef6a0cf6d7a2d12318e10cb49d6d68f34"
+                "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
+                "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
             ],
             "index": "pypi",
-            "version": "==0.13.0"
+            "version": "==0.13.1"
         },
         "prometheus-client": {
             "hashes": [
@@ -866,10 +874,10 @@
         },
         "pygments": {
             "hashes": [
-                "sha256:71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127",
-                "sha256:881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297"
+                "sha256:83ec6c6133ca6b529b7ff5aa826328fd14b5bb02a58c37f4f06384e96a0f94ab",
+                "sha256:b7949de3d396836085fea596998b135a22610bbcc4f2abfe9e448e44cbc58388"
             ],
-            "version": "==2.4.2"
+            "version": "==2.5.1"
         },
         "pylint": {
             "hashes": [
@@ -888,17 +896,17 @@
         },
         "pyrsistent": {
             "hashes": [
-                "sha256:eb6545dbeb1aa69ab1fb4809bfbf5a8705e44d92ef8fc7c2361682a47c46c778"
+                "sha256:f3b280d030afb652f79d67c5586157c5c1355c9a58dfc7940566e28d28f3df1b"
             ],
-            "version": "==0.15.5"
+            "version": "==0.15.6"
         },
         "pytest": {
             "hashes": [
-                "sha256:8e256fe71eb74e14a4d20a5987bb5e1488f0511ee800680aaedc62b9358714e8",
-                "sha256:ff0090819f669aaa0284d0f4aad1a6d9d67a6efdc6dd4eb4ac56b704f890a0d6"
+                "sha256:63344a2e3bce2e4d522fd62b4fdebb647c019f1f9e4ca075debbd13219db4418",
+                "sha256:f67403f33b2b1d25a6756184077394167fe5e2f9d8bdaab30707d19ccec35427"
             ],
             "index": "pypi",
-            "version": "==5.2.4"
+            "version": "==5.3.1"
         },
         "pytest-cov": {
             "hashes": [
diff --git a/backend/metagenedb/api/catalog/views/gene.py b/backend/metagenedb/api/catalog/views/gene.py
index 15d63f9..ada0b5e 100644
--- a/backend/metagenedb/api/catalog/views/gene.py
+++ b/backend/metagenedb/api/catalog/views/gene.py
@@ -1,4 +1,5 @@
 import pandas as pd
+from django_pandas.io import read_frame
 from drf_yasg import openapi
 from drf_yasg.utils import swagger_auto_schema
 from marshmallow.exceptions import ValidationError
@@ -7,10 +8,10 @@ from rest_framework.decorators import action
 from rest_framework.response import Response
 from rest_framework.status import HTTP_204_NO_CONTENT, HTTP_422_UNPROCESSABLE_ENTITY
 
-from metagenedb.common.utils.df_operations import get_mask
 from metagenedb.apps.catalog.models import Gene
 from metagenedb.api.catalog.qparams_validators.gene import GeneLengthQueryParams
 from metagenedb.apps.catalog.serializers import GeneSerializer
+from metagenedb.common.utils.df_operations import get_mask
 
 from .bulk_viewset import BulkViewSet
 
@@ -52,11 +53,12 @@ class GeneViewSet(BulkViewSet):
     serializer_class = GeneSerializer
     lookup_field = 'gene_id'
     GENE_LENGTH_COL = 'length'
+    DEFAULT_WINDOW_SIZE = 10000
 
     def get_permissions(self):
         return super(self.__class__, self).get_permissions()
 
-    def _count_windows(self, df, window_size=10000, window_col=GENE_LENGTH_COL):
+    def _count_windows(self, df, window_size=DEFAULT_WINDOW_SIZE, window_col=GENE_LENGTH_COL):
         """
         Count how many line of the df belong to each windows defined by the window_size for the window_col
         :param df:
@@ -84,7 +86,7 @@ class GeneViewSet(BulkViewSet):
         operation_id='Gene length distribution',
     )
     @action(methods=['get'], detail=False)
-    def gene_length(self, request, window_size=10000):
+    def gene_length(self, request):
         try:
             query_params = GeneLengthQueryParams().load(request.query_params)
         except ValidationError as validation_error:
@@ -93,9 +95,9 @@ class GeneViewSet(BulkViewSet):
                 'allowed_query_params': ', '.join(GeneLengthQueryParams().declared_fields.keys())
             })
             return Response(error_message, status=HTTP_422_UNPROCESSABLE_ENTITY)
-        if 'window_size' in query_params:
-            window_size = query_params.get('window_size')
-        df = pd.DataFrame(list(self.queryset.values(self.GENE_LENGTH_COL)))
+
+        window_size = query_params.get('window_size', self.DEFAULT_WINDOW_SIZE)
+        df = read_frame(self.queryset.values(self.GENE_LENGTH_COL))
         if df.empty:
             return Response(
                 {'results': {}},
diff --git a/backend/metagenedb/common/utils/profiling.py b/backend/metagenedb/common/utils/profiling.py
new file mode 100644
index 0000000..e1c6d3e
--- /dev/null
+++ b/backend/metagenedb/common/utils/profiling.py
@@ -0,0 +1,16 @@
+import cProfile
+import functools
+
+
+def profile(file_path):
+    def decorator_profile(func):
+        @functools.wraps(func)
+        def wrapper_profile(*args, **kwargs):
+            cp = cProfile.Profile()
+            cp.enable()
+            value = func(*args, **kwargs)
+            cp.disable()
+            cp.dump_stats(file_path)
+            return value
+        return wrapper_profile
+    return decorator_profile
-- 
GitLab