gene.py 4.7 KB
Newer Older
1
from django_pandas.io import read_frame
2
3
from drf_yasg import openapi
from drf_yasg.utils import swagger_auto_schema
4
5
from marshmallow.exceptions import ValidationError
from rest_framework import filters
6
7
from rest_framework.decorators import action
from rest_framework.response import Response
8
from rest_framework.status import HTTP_204_NO_CONTENT, HTTP_422_UNPROCESSABLE_ENTITY
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
9

10
from metagenedb.apps.catalog.models import Gene
11
from metagenedb.api.catalog.qparams_validators.gene import GeneLengthQueryParams
12
from metagenedb.apps.catalog.serializers import GeneSerializer
13
from metagenedb.common.utils.df_operations import get_mask
14

15
16
from .bulk_viewset import BulkViewSet

17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
class DocGeneLength(object):
    """
    Define response for API documentation of gene length distribution method
    {
    "results": {
        "counts": [
            0,
            887,
        ],
        "labels": [
            "0-9999",
            "10000-19999",
            "20000-29999",
        ]
    }
}
    """
    window_size_param = openapi.Parameter('window_size', in_=openapi.IN_QUERY, description='Size of the window.',
                                          type=openapi.TYPE_INTEGER, default=10000)
    counts = openapi.Schema(type="array", items=openapi.Schema(type="int"),
                            description="Counts for every window_size")
    labels = openapi.Schema(type="array", items=openapi.Schema(type="char"),
                            description="Corresponding windows")
    results = openapi.Schema(type="object", properties={'counts': counts, 'labels': labels},
                             description="results of your request")
    gene_length_schema = openapi.Schema(type="object", properties={'results': results})
    gene_length_response = openapi.Response('Get the distribution of gene length for a given window size',
                                            schema=gene_length_schema)
46
47


48
class GeneViewSet(BulkViewSet):
49
50
    search_fields = ['gene_name']
    filter_backends = (filters.SearchFilter,)
51
    queryset = Gene.objects.select_related('taxonomy').prefetch_related('functions').all()
Kenzo-Hugo Hillion's avatar
Kenzo-Hugo Hillion committed
52
    serializer_class = GeneSerializer
53
    lookup_field = 'gene_id'
54
    GENE_LENGTH_COL = 'length'
55
56
    DEFAULT_WINDOW_SIZE = 1000
    DEFAULT_STOP_AT = 10000
57

58
59
60
    def get_permissions(self):
        return super(self.__class__, self).get_permissions()

61
    def _count_windows(self, df, window_size=DEFAULT_WINDOW_SIZE, window_col=GENE_LENGTH_COL, stop_at=DEFAULT_STOP_AT):
62
63
64
65
66
67
68
        """
        Count how many line of the df belong to each windows defined by the window_size for the window_col
        :param df:
        :param window_col: column concerned by the window
        :param window_size: size of the window
        :return: {'data': COUNTS_BY_WINDOW, 'labels': START-END}
        """
69
70
71
72
        length_max = df[window_col].max()
        stop_at = length_max if length_max < stop_at else stop_at
        all_ranges = [[i, i + window_size] for i in range(0, stop_at + 1, window_size)]
        all_ranges[-1][1] = length_max + 1  # last should contain all above the stop_at
73
74
75
        data = []
        labels = []
        for rg in all_ranges:
76
            labels.append(f"{rg[0]/1000}k-{rg[1]/1000}k")
77
            data.append(df[get_mask(df, rg, window_col)].count()[window_col])
78
79
80
        # Change labels
        labels[0] = f"<{labels[0].split('-')[1]}"
        labels[-1] = f">{labels[-1].split('-')[0]}"
81
82
83
84
85
        return {
            'counts': data,
            'labels': labels
        }

86
    @swagger_auto_schema(
87
        manual_parameters=[DocGeneLength.window_size_param],
88
        responses={
89
90
            '200': DocGeneLength.gene_length_response,
            '204': 'No genes on the catalog to build the distribution'
91
        },
92
        operation_id='Gene length distribution',
93
    )
94
    @action(methods=['get'], detail=False)
95
    def gene_length(self, request):
96
97
98
99
100
101
102
103
        try:
            query_params = GeneLengthQueryParams().load(request.query_params)
        except ValidationError as validation_error:
            error_message = validation_error.normalized_messages()
            error_message.update({
                'allowed_query_params': ', '.join(GeneLengthQueryParams().declared_fields.keys())
            })
            return Response(error_message, status=HTTP_422_UNPROCESSABLE_ENTITY)
104
105

        window_size = query_params.get('window_size', self.DEFAULT_WINDOW_SIZE)
106
107
        stop_at = query_params.get('stop_at', self.DEFAULT_STOP_AT)
        df = read_frame(Gene.objects.all(), fieldnames=[self.GENE_LENGTH_COL])
108
109
110
        if df.empty:
            return Response(
                {'results': {}},
111
                status=HTTP_204_NO_CONTENT
112
            )
113
        return Response(
114
            {'results': self._count_windows(df, window_size=window_size, stop_at=stop_at)}
115
        )