Commit bd33850f authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

create csv qparam for genes and refactor cache for count

parent cf322915
Pipeline #32695 failed with stages
in 3 minutes and 23 seconds
......@@ -15,3 +15,4 @@ class GeneQueryParams(PaginatedQueryParams):
function = fields.String()
source = fields.String()
fasta = fields.Boolean()
csv = fields.Boolean()
......@@ -14,11 +14,12 @@ from metagenedb.apps.catalog.models import Gene
from metagenedb.api.catalog.filters import GeneFilter
from metagenedb.api.catalog.qparams_validators.gene import GeneQueryParams
from metagenedb.apps.catalog.serializers import GeneSerializer
from metagenedb.common.utils.cache import queryset_count_cached
from .base import BulkViewSet
MAX_FASTA_GENES = settings.MAX_FASTA_GENES
MAX_DOWNLOAD_GENES = settings.MAX_DOWNLOAD_GENES
class GeneViewSet(BulkViewSet):
......@@ -29,27 +30,40 @@ class GeneViewSet(BulkViewSet):
lookup_field = 'gene_id'
def _get_queryset_count(self, queryset):
hash_object = hashlib.md5(str(queryset.query).encode('utf-8'))
redis_key = hash_object.hexdigest()
if redis_key in cache:
return cache.get(redis_key)
else:
return queryset.count()
return queryset_count_cached(queryset)
@property
def too_many_genes_error_response(self):
error_message = f'Too many genes in the query, can obtain download up to {MAX_DOWNLOAD_GENES} genes.'
return Response({'message': error_message}, status=HTTP_500_INTERNAL_SERVER_ERROR)
def _check_too_many_genes(self, queryset):
count = self._get_queryset_count(queryset)
return count >= MAX_DOWNLOAD_GENES
def _build_fasta_response(self):
queryset = self.filter_queryset(self.get_queryset())
count = self._get_queryset_count(queryset)
if count >= MAX_FASTA_GENES:
error_message = f'Too many genes in the query, can obtain only up to {MAX_FASTA_GENES} fasta seq.'
return Response({'message': error_message}, status=HTTP_500_INTERNAL_SERVER_ERROR)
fasta_file = StringIO()
for gene in queryset.iterator():
fasta_file.write(gene.fasta)
# generate the file
response = HttpResponse(fasta_file.getvalue(), content_type='text/fasta')
filename = 'metagenedb_sequences.fasta'
response['Content-Disposition'] = 'attachment; filename=%s' % filename
return response
if self._check_too_many_genes(queryset):
return self.too_many_genes_error_response
with StringIO() as fasta_file:
for gene in queryset.iterator():
fasta_file.write(gene.fasta)
# generate the file
response = HttpResponse(fasta_file.getvalue(), content_type='text/fasta')
filename = 'metagenedb_sequences.fasta'
response['Content-Disposition'] = 'attachment; filename=%s' % filename
return response
def _build_csv_response(self):
queryset = self.filter_queryset(self.get_queryset())
if self._check_too_many_genes(queryset):
return self.too_many_genes_error_response
with StringIO() as csv_file:
# generate the file
response = HttpResponse("Incoming", content_type='text/csv')
filename = 'metagenedb.csv'
response['Content-Disposition'] = 'attachment; filename=%s' % filename
return response
@swagger_auto_schema(
tags=['Genes'],
......@@ -65,6 +79,8 @@ class GeneViewSet(BulkViewSet):
return Response(error_message, status=HTTP_422_UNPROCESSABLE_ENTITY)
if query_params.get('fasta', False) is True:
return self._build_fasta_response()
if query_params.get('csv', False) is True:
return self._build_csv_response()
return super().list(request, *args, **kwargs)
@swagger_auto_schema(
......
......@@ -8,7 +8,6 @@ class TestGenes(TestCase):
def test_get_genes_no_auth(self):
"""
Unauthenticated users should be able to access genes
@TODO make unaccessible
"""
url = reverse('api:catalog:v1:genes-list')
resp = self.client.get(url)
......
import hashlib
import inspect
from django.conf import settings
from django.core.cache import cache
from django.core.cache.backends.base import DEFAULT_TIMEOUT
from django.core.paginator import Paginator
from django.utils.functional import cached_property
from django.utils.inspect import method_has_no_args
CACHE_TTL = getattr(settings, 'CACHE_TTL', DEFAULT_TIMEOUT)
from metagenedb.common.utils.cache import queryset_count_cached
class CachedCountPaginator(Paginator):
......@@ -17,16 +9,4 @@ class CachedCountPaginator(Paginator):
@cached_property
def count(self):
"""Return the total number of objects, across all pages."""
# Create has from SQL query for REDIS cache
hash_object = hashlib.md5(str(self.object_list.query).encode('utf-8'))
redis_key = hash_object.hexdigest()
if redis_key in cache:
return cache.get(redis_key)
else:
c = getattr(self.object_list, 'count', None)
if callable(c) and not inspect.isbuiltin(c) and method_has_no_args(c):
count = c()
else:
count = len(self.object_list)
cache.set(redis_key, count, timeout=CACHE_TTL)
return count
return queryset_count_cached(self.object_list)
from .count import queryset_count_cached # noqa
import hashlib
import inspect
from django.conf import settings
from django.core.cache import cache
from django.core.cache.backends.base import DEFAULT_TIMEOUT
from django.utils.inspect import method_has_no_args
CACHE_TTL = getattr(settings, 'CACHE_TTL', DEFAULT_TIMEOUT)
def queryset_count_cached(queryset):
# Create has from SQL query for REDIS cache
hash_object = hashlib.md5(str(queryset.query).encode('utf-8'))
redis_key = hash_object.hexdigest()
if redis_key in cache:
return cache.get(redis_key)
else:
c = getattr(queryset, 'count', None)
if callable(c) and not inspect.isbuiltin(c) and method_has_no_args(c):
count = c()
else:
count = len(queryset)
cache.set(redis_key, count, timeout=CACHE_TTL)
return count
......@@ -9,4 +9,4 @@ env = environ.Env()
environ.Env.read_env(root('.env')) # reading .env file
# Maximum number of FASTA genes able to retrieve through API
MAX_FASTA_GENES = env.str('MAX_FASTA_GENES', default=100000)
MAX_DOWNLOAD_GENES = env.str('MAX_DOWNLOAD_GENES', default=100000)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment