diff --git a/backend/metagenedb/api/catalog/views/gene.py b/backend/metagenedb/api/catalog/views/gene.py index 57d9971146dbe294cb14b4d14922389640167ef5..dcf485ea771fd551a1be2c4b1240db40e34d0cdb 100644 --- a/backend/metagenedb/api/catalog/views/gene.py +++ b/backend/metagenedb/api/catalog/views/gene.py @@ -1,4 +1,4 @@ -from django_pandas.io import read_frame +from django.db.models import Max from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from marshmallow.exceptions import ValidationError @@ -10,7 +10,6 @@ from metagenedb.apps.catalog.models import Gene from metagenedb.api.catalog.filters import GeneFilter from metagenedb.api.catalog.qparams_validators.gene import GeneLengthQueryParams, GeneQueryParams from metagenedb.apps.catalog.serializers import GeneSerializer -from metagenedb.common.utils.df_operations import get_mask from .bulk_viewset import BulkViewSet @@ -59,15 +58,16 @@ class GeneViewSet(BulkViewSet): def get_permissions(self): return super(self.__class__, self).get_permissions() - def _count_windows(self, df, window_size=DEFAULT_WINDOW_SIZE, window_col=GENE_LENGTH_COL, stop_at=DEFAULT_STOP_AT): + def _count_windows(self, queryset, window_size=DEFAULT_WINDOW_SIZE, window_col=GENE_LENGTH_COL, + stop_at=DEFAULT_STOP_AT): """ - Count how many line of the df belong to each windows defined by the window_size for the window_col - :param df: + Count how many entries by performing one query per range + :param queryset: :param window_col: column concerned by the window :param window_size: size of the window :return: {'data': COUNTS_BY_WINDOW, 'labels': START-END} """ - length_max = df[window_col].max() + length_max = queryset.aggregate(Max('length')).get('length__max', 0) stop_at = length_max if length_max < stop_at else stop_at all_ranges = [[i, i + window_size] for i in range(0, stop_at + 1, window_size)] all_ranges[-1][1] = length_max + 1 # last should contain all above the stop_at @@ -75,7 +75,7 @@ class GeneViewSet(BulkViewSet): labels = [] for rg in all_ranges: labels.append(f"{rg[0]/1000}k-{rg[1]/1000}k") - data.append(df[get_mask(df, rg, window_col)].count()[window_col]) + data.append(queryset.filter(length__gte=rg[0], length__lt=rg[1]).count()) # Change labels labels[0] = f"<{labels[0].split('-')[1]}" labels[-1] = f">{labels[-1].split('-')[0]}" @@ -105,12 +105,13 @@ class GeneViewSet(BulkViewSet): window_size = query_params.get('window_size', self.DEFAULT_WINDOW_SIZE) stop_at = query_params.get('stop_at', self.DEFAULT_STOP_AT) - df = read_frame(Gene.objects.all(), fieldnames=[self.GENE_LENGTH_COL]) - if df.empty: + # df = read_frame(Gene.objects.all(), fieldnames=[self.GENE_LENGTH_COL]) + queryset = Gene.objects.all() + if not queryset.exists(): return Response( - {'results': {}}, + {}, status=HTTP_204_NO_CONTENT ) return Response( - {'results': self._count_windows(df, window_size=window_size, stop_at=stop_at)} + {'results': self._count_windows(queryset, window_size=window_size, stop_at=stop_at)} ) diff --git a/backend/metagenedb/api/catalog/views/test_gene.py b/backend/metagenedb/api/catalog/views/test_gene.py index 8811ae325653c0622ae4014a997c959b22eafbaa..24e2ee252eaa0e6239ea21fd90d94d9ae40a0564 100644 --- a/backend/metagenedb/api/catalog/views/test_gene.py +++ b/backend/metagenedb/api/catalog/views/test_gene.py @@ -1,4 +1,3 @@ -import pandas as pd from django.contrib.auth.models import User from django.test import TestCase from django.urls import reverse @@ -6,7 +5,6 @@ from rest_framework import status from rest_framework.test import APITestCase from rest_framework_jwt.settings import api_settings -from metagenedb.api.catalog.views.gene import GeneViewSet from metagenedb.apps.catalog.factory import GeneFactory from metagenedb.common.utils.mocks.metagenedb import MetageneDBCatalogGeneAPIMock @@ -41,43 +39,17 @@ class TestGenes(TestCase): self.assertEqual(resp.status_code, status.HTTP_200_OK) -class TestCountWindows(TestCase): - - def setUp(self): - self.window_col = "length" - self.df = pd.DataFrame( - [22, 29, 35], - columns=[self.window_col] - ) - - def test_simple_count_window10(self): - expected_dict = { - 'labels': ['<0.01k', '0.01k-0.02k', '0.02k-0.03k', '>0.03k'], - 'counts': [0, 0, 2, 1] - } - geneviewset = GeneViewSet() - test_dict = geneviewset._count_windows(self.df, 10, window_col=self.window_col) - self.assertDictEqual(test_dict, expected_dict) - - def test_simple_count_window10_stop20(self): - expected_dict = { - 'labels': ['<0.01k', '0.01k-0.02k', '>0.02k'], - 'counts': [0, 0, 3] - } - geneviewset = GeneViewSet() - test_dict = geneviewset._count_windows(self.df, window_size=10, - window_col=self.window_col, stop_at=20) - self.assertDictEqual(test_dict, expected_dict) - - class TestCountWindowsAPI(APITestCase): def setUp(self): self.gene_api = MetageneDBCatalogGeneAPIMock(self.client) - for i in range(2000, 4000, 350): - GeneFactory.create(length=i) + + def test_gene_length_no_content(self): + self.assertFalse(self.gene_api.get_gene_length()) def test_gene_length_api(self): + for i in range(2000, 4000, 350): + GeneFactory.create(length=i) expected_dict = { 'results': { 'counts': [0, 0, 3, 3], @@ -87,6 +59,8 @@ class TestCountWindowsAPI(APITestCase): self.assertDictEqual(self.gene_api.get_gene_length(), expected_dict) def test_gene_length_api_stop_at_2000(self): + for i in range(2000, 4000, 350): + GeneFactory.create(length=i) expected_dict = { 'results': { 'counts': [0, 0, 6], diff --git a/backend/metagenedb/common/utils/mocks/metagenedb.py b/backend/metagenedb/common/utils/mocks/metagenedb.py index f9688ec50a2b414f6a855f63b88a6aeb2fa0b41e..8f39b15b95c8a352579b214d1c68a16d0be03cc1 100644 --- a/backend/metagenedb/common/utils/mocks/metagenedb.py +++ b/backend/metagenedb/common/utils/mocks/metagenedb.py @@ -52,6 +52,8 @@ class MetageneDBCatalogGeneAPIMock(MetageneDBAPIMock): response = self.client.get(reverse(reverse_path), params) if response.status_code in self.BAD_REQUESTS: raise HTTPError + if response.status_code == 204: # no content + return {} return response.json()