Commit ac12a76b authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

Improve route for gene_length

parent b2f4f373
Pipeline #18912 passed with stages
in 2 minutes and 18 seconds
......@@ -3,3 +3,4 @@ from marshmallow import Schema, fields
class GeneLengthQueryParams(Schema):
window_size = fields.Integer()
stop_at = fields.Integer()
import pandas as pd
from django_pandas.io import read_frame
from drf_yasg import openapi
from drf_yasg.utils import swagger_auto_schema
......@@ -53,12 +52,13 @@ class GeneViewSet(BulkViewSet):
serializer_class = GeneSerializer
lookup_field = 'gene_id'
GENE_LENGTH_COL = 'length'
DEFAULT_WINDOW_SIZE = 10000
DEFAULT_WINDOW_SIZE = 1000
DEFAULT_STOP_AT = 10000
def get_permissions(self):
return super(self.__class__, self).get_permissions()
def _count_windows(self, df, window_size=DEFAULT_WINDOW_SIZE, window_col=GENE_LENGTH_COL):
def _count_windows(self, df, window_size=DEFAULT_WINDOW_SIZE, window_col=GENE_LENGTH_COL, stop_at=DEFAULT_STOP_AT):
"""
Count how many line of the df belong to each windows defined by the window_size for the window_col
:param df:
......@@ -66,12 +66,18 @@ class GeneViewSet(BulkViewSet):
:param window_size: size of the window
:return: {'data': COUNTS_BY_WINDOW, 'labels': START-END}
"""
all_ranges = [(i, i + window_size) for i in range(0, df[window_col].max(), window_size)]
length_max = df[window_col].max()
stop_at = length_max if length_max < stop_at else stop_at
all_ranges = [[i, i + window_size] for i in range(0, stop_at + 1, window_size)]
all_ranges[-1][1] = length_max + 1 # last should contain all above the stop_at
data = []
labels = []
for rg in all_ranges:
labels.append(f"{rg[0]}-{rg[1]-1}")
labels.append(f"{rg[0]/1000}k-{rg[1]/1000}k")
data.append(df[get_mask(df, rg, window_col)].count()[window_col])
# Change labels
labels[0] = f"<{labels[0].split('-')[1]}"
labels[-1] = f">{labels[-1].split('-')[0]}"
return {
'counts': data,
'labels': labels
......@@ -97,12 +103,13 @@ class GeneViewSet(BulkViewSet):
return Response(error_message, status=HTTP_422_UNPROCESSABLE_ENTITY)
window_size = query_params.get('window_size', self.DEFAULT_WINDOW_SIZE)
df = read_frame(self.queryset.values(self.GENE_LENGTH_COL))
stop_at = query_params.get('stop_at', self.DEFAULT_STOP_AT)
df = read_frame(Gene.objects.all(), fieldnames=[self.GENE_LENGTH_COL])
if df.empty:
return Response(
{'results': {}},
status=HTTP_204_NO_CONTENT
)
return Response(
{'results': self._count_windows(df, window_size)}
{'results': self._count_windows(df, window_size=window_size, stop_at=stop_at)}
)
......@@ -50,9 +50,19 @@ class TestCountWindows(TestCase):
def test_simple_count_window10(self):
expected_dict = {
'labels': ['0-9', '10-19', '20-29', '30-39'],
'labels': ['<0.01k', '0.01k-0.02k', '0.02k-0.03k', '>0.03k'],
'counts': [0, 0, 2, 1]
}
geneviewset = GeneViewSet()
test_dict = geneviewset._count_windows(self.df, 10, window_col=self.window_col)
self.assertDictEqual(test_dict, expected_dict)
def test_simple_count_window10_stop20(self):
expected_dict = {
'labels': ['<0.01k', '0.01k-0.02k', '>0.02k'],
'counts': [0, 0, 3]
}
geneviewset = GeneViewSet()
test_dict = geneviewset._count_windows(self.df, window_size=10,
window_col=self.window_col, stop_at=20)
self.assertDictEqual(test_dict, expected_dict)
......@@ -35,7 +35,7 @@ export default {
data() {
return {
geneLengthData: {},
geneLengthWindowSize: 10000,
geneLengthWindowSize: 1000,
};
},
mounted() {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment