Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
bd33850f
Commit
bd33850f
authored
Jun 22, 2020
by
Kenzo-Hugo Hillion
♻
Browse files
create csv qparam for genes and refactor cache for count
parent
cf322915
Pipeline
#32695
failed with stages
in 3 minutes and 23 seconds
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/api/catalog/qparams_validators/gene.py
View file @
bd33850f
...
...
@@ -15,3 +15,4 @@ class GeneQueryParams(PaginatedQueryParams):
function
=
fields
.
String
()
source
=
fields
.
String
()
fasta
=
fields
.
Boolean
()
csv
=
fields
.
Boolean
()
backend/metagenedb/api/catalog/views/gene.py
View file @
bd33850f
...
...
@@ -14,11 +14,12 @@ from metagenedb.apps.catalog.models import Gene
from
metagenedb.api.catalog.filters
import
GeneFilter
from
metagenedb.api.catalog.qparams_validators.gene
import
GeneQueryParams
from
metagenedb.apps.catalog.serializers
import
GeneSerializer
from
metagenedb.common.utils.cache
import
queryset_count_cached
from
.base
import
BulkViewSet
MAX_
FASTA
_GENES
=
settings
.
MAX_
FASTA
_GENES
MAX_
DOWNLOAD
_GENES
=
settings
.
MAX_
DOWNLOAD
_GENES
class
GeneViewSet
(
BulkViewSet
):
...
...
@@ -29,27 +30,40 @@ class GeneViewSet(BulkViewSet):
lookup_field
=
'gene_id'
def
_get_queryset_count
(
self
,
queryset
):
hash_object
=
hashlib
.
md5
(
str
(
queryset
.
query
).
encode
(
'utf-8'
))
redis_key
=
hash_object
.
hexdigest
()
if
redis_key
in
cache
:
return
cache
.
get
(
redis_key
)
else
:
return
queryset
.
count
()
return
queryset_count_cached
(
queryset
)
@
property
def
too_many_genes_error_response
(
self
):
error_message
=
f
'Too many genes in the query, can obtain download up to
{
MAX_DOWNLOAD_GENES
}
genes.'
return
Response
({
'message'
:
error_message
},
status
=
HTTP_500_INTERNAL_SERVER_ERROR
)
def
_check_too_many_genes
(
self
,
queryset
):
count
=
self
.
_get_queryset_count
(
queryset
)
return
count
>=
MAX_DOWNLOAD_GENES
def
_build_fasta_response
(
self
):
queryset
=
self
.
filter_queryset
(
self
.
get_queryset
())
count
=
self
.
_get_queryset_count
(
queryset
)
if
count
>=
MAX_FASTA_GENES
:
error_message
=
f
'Too many genes in the query, can obtain only up to
{
MAX_FASTA_GENES
}
fasta seq.'
return
Response
({
'message'
:
error_message
},
status
=
HTTP_500_INTERNAL_SERVER_ERROR
)
fasta_file
=
StringIO
()
for
gene
in
queryset
.
iterator
():
fasta_file
.
write
(
gene
.
fasta
)
# generate the file
response
=
HttpResponse
(
fasta_file
.
getvalue
(),
content_type
=
'text/fasta'
)
filename
=
'metagenedb_sequences.fasta'
response
[
'Content-Disposition'
]
=
'attachment; filename=%s'
%
filename
return
response
if
self
.
_check_too_many_genes
(
queryset
):
return
self
.
too_many_genes_error_response
with
StringIO
()
as
fasta_file
:
for
gene
in
queryset
.
iterator
():
fasta_file
.
write
(
gene
.
fasta
)
# generate the file
response
=
HttpResponse
(
fasta_file
.
getvalue
(),
content_type
=
'text/fasta'
)
filename
=
'metagenedb_sequences.fasta'
response
[
'Content-Disposition'
]
=
'attachment; filename=%s'
%
filename
return
response
def
_build_csv_response
(
self
):
queryset
=
self
.
filter_queryset
(
self
.
get_queryset
())
if
self
.
_check_too_many_genes
(
queryset
):
return
self
.
too_many_genes_error_response
with
StringIO
()
as
csv_file
:
# generate the file
response
=
HttpResponse
(
"Incoming"
,
content_type
=
'text/csv'
)
filename
=
'metagenedb.csv'
response
[
'Content-Disposition'
]
=
'attachment; filename=%s'
%
filename
return
response
@
swagger_auto_schema
(
tags
=
[
'Genes'
],
...
...
@@ -65,6 +79,8 @@ class GeneViewSet(BulkViewSet):
return
Response
(
error_message
,
status
=
HTTP_422_UNPROCESSABLE_ENTITY
)
if
query_params
.
get
(
'fasta'
,
False
)
is
True
:
return
self
.
_build_fasta_response
()
if
query_params
.
get
(
'csv'
,
False
)
is
True
:
return
self
.
_build_csv_response
()
return
super
().
list
(
request
,
*
args
,
**
kwargs
)
@
swagger_auto_schema
(
...
...
backend/metagenedb/api/catalog/views/test_gene.py
View file @
bd33850f
...
...
@@ -8,7 +8,6 @@ class TestGenes(TestCase):
def
test_get_genes_no_auth
(
self
):
"""
Unauthenticated users should be able to access genes
@TODO make unaccessible
"""
url
=
reverse
(
'api:catalog:v1:genes-list'
)
resp
=
self
.
client
.
get
(
url
)
...
...
backend/metagenedb/common/django_default/paginator.py
View file @
bd33850f
import
hashlib
import
inspect
from
django.conf
import
settings
from
django.core.cache
import
cache
from
django.core.cache.backends.base
import
DEFAULT_TIMEOUT
from
django.core.paginator
import
Paginator
from
django.utils.functional
import
cached_property
from
django.utils.inspect
import
method_has_no_args
CACHE_TTL
=
getattr
(
settings
,
'CACHE_TTL'
,
DEFAULT_TIMEOUT
)
from
metagenedb.common.utils.cache
import
queryset_count_cached
class
CachedCountPaginator
(
Paginator
):
...
...
@@ -17,16 +9,4 @@ class CachedCountPaginator(Paginator):
@
cached_property
def
count
(
self
):
"""Return the total number of objects, across all pages."""
# Create has from SQL query for REDIS cache
hash_object
=
hashlib
.
md5
(
str
(
self
.
object_list
.
query
).
encode
(
'utf-8'
))
redis_key
=
hash_object
.
hexdigest
()
if
redis_key
in
cache
:
return
cache
.
get
(
redis_key
)
else
:
c
=
getattr
(
self
.
object_list
,
'count'
,
None
)
if
callable
(
c
)
and
not
inspect
.
isbuiltin
(
c
)
and
method_has_no_args
(
c
):
count
=
c
()
else
:
count
=
len
(
self
.
object_list
)
cache
.
set
(
redis_key
,
count
,
timeout
=
CACHE_TTL
)
return
count
return
queryset_count_cached
(
self
.
object_list
)
backend/metagenedb/common/utils/cache/__init__.py
0 → 100644
View file @
bd33850f
from
.count
import
queryset_count_cached
# noqa
backend/metagenedb/common/utils/cache/count.py
0 → 100644
View file @
bd33850f
import
hashlib
import
inspect
from
django.conf
import
settings
from
django.core.cache
import
cache
from
django.core.cache.backends.base
import
DEFAULT_TIMEOUT
from
django.utils.inspect
import
method_has_no_args
CACHE_TTL
=
getattr
(
settings
,
'CACHE_TTL'
,
DEFAULT_TIMEOUT
)
def
queryset_count_cached
(
queryset
):
# Create has from SQL query for REDIS cache
hash_object
=
hashlib
.
md5
(
str
(
queryset
.
query
).
encode
(
'utf-8'
))
redis_key
=
hash_object
.
hexdigest
()
if
redis_key
in
cache
:
return
cache
.
get
(
redis_key
)
else
:
c
=
getattr
(
queryset
,
'count'
,
None
)
if
callable
(
c
)
and
not
inspect
.
isbuiltin
(
c
)
and
method_has_no_args
(
c
):
count
=
c
()
else
:
count
=
len
(
queryset
)
cache
.
set
(
redis_key
,
count
,
timeout
=
CACHE_TTL
)
return
count
backend/metagenedb/settings/metagenedb.py
View file @
bd33850f
...
...
@@ -9,4 +9,4 @@ env = environ.Env()
environ
.
Env
.
read_env
(
root
(
'.env'
))
# reading .env file
# Maximum number of FASTA genes able to retrieve through API
MAX_
FASTA
_GENES
=
env
.
str
(
'MAX_
FASTA
_GENES'
,
default
=
100000
)
MAX_
DOWNLOAD
_GENES
=
env
.
str
(
'MAX_
DOWNLOAD
_GENES'
,
default
=
100000
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment