Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
bd68a8c4
Commit
bd68a8c4
authored
Dec 02, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Improve perf for counting windows
parent
87cb9d53
Pipeline
#19172
passed with stages
in 2 minutes and 20 seconds
Changes
3
Pipelines
1
Show whitespace changes
Inline
Side-by-side
backend/metagenedb/api/catalog/views/gene.py
View file @
bd68a8c4
from
django
_pandas.io
import
read_frame
from
django
.db.models
import
Max
from
drf_yasg
import
openapi
from
drf_yasg.utils
import
swagger_auto_schema
from
marshmallow.exceptions
import
ValidationError
...
...
@@ -10,7 +10,6 @@ from metagenedb.apps.catalog.models import Gene
from
metagenedb.api.catalog.filters
import
GeneFilter
from
metagenedb.api.catalog.qparams_validators.gene
import
GeneLengthQueryParams
,
GeneQueryParams
from
metagenedb.apps.catalog.serializers
import
GeneSerializer
from
metagenedb.common.utils.df_operations
import
get_mask
from
.bulk_viewset
import
BulkViewSet
...
...
@@ -59,15 +58,16 @@ class GeneViewSet(BulkViewSet):
def
get_permissions
(
self
):
return
super
(
self
.
__class__
,
self
).
get_permissions
()
def
_count_windows
(
self
,
df
,
window_size
=
DEFAULT_WINDOW_SIZE
,
window_col
=
GENE_LENGTH_COL
,
stop_at
=
DEFAULT_STOP_AT
):
def
_count_windows
(
self
,
queryset
,
window_size
=
DEFAULT_WINDOW_SIZE
,
window_col
=
GENE_LENGTH_COL
,
stop_at
=
DEFAULT_STOP_AT
):
"""
Count how many
line of the df belong to each windows defined by the window_size for the window_col
:param
df
:
Count how many
entries by performing one query per range
:param
queryset
:
:param window_col: column concerned by the window
:param window_size: size of the window
:return: {'data': COUNTS_BY_WINDOW, 'labels': START-END}
"""
length_max
=
df
[
window_col
].
max
(
)
length_max
=
queryset
.
aggregate
(
Max
(
'length'
)).
get
(
'length__max'
,
0
)
stop_at
=
length_max
if
length_max
<
stop_at
else
stop_at
all_ranges
=
[[
i
,
i
+
window_size
]
for
i
in
range
(
0
,
stop_at
+
1
,
window_size
)]
all_ranges
[
-
1
][
1
]
=
length_max
+
1
# last should contain all above the stop_at
...
...
@@ -75,7 +75,7 @@ class GeneViewSet(BulkViewSet):
labels
=
[]
for
rg
in
all_ranges
:
labels
.
append
(
f
"
{
rg
[
0
]
/
1000
}
k-
{
rg
[
1
]
/
1000
}
k"
)
data
.
append
(
df
[
get_mask
(
df
,
rg
,
window_col
)].
count
()[
window_col
]
)
data
.
append
(
queryset
.
filter
(
length__gte
=
rg
[
0
],
length__lt
=
rg
[
1
]).
count
()
)
# Change labels
labels
[
0
]
=
f
"<
{
labels
[
0
].
split
(
'-'
)[
1
]
}
"
labels
[
-
1
]
=
f
">
{
labels
[
-
1
].
split
(
'-'
)[
0
]
}
"
...
...
@@ -105,12 +105,13 @@ class GeneViewSet(BulkViewSet):
window_size
=
query_params
.
get
(
'window_size'
,
self
.
DEFAULT_WINDOW_SIZE
)
stop_at
=
query_params
.
get
(
'stop_at'
,
self
.
DEFAULT_STOP_AT
)
df
=
read_frame
(
Gene
.
objects
.
all
(),
fieldnames
=
[
self
.
GENE_LENGTH_COL
])
if
df
.
empty
:
# df = read_frame(Gene.objects.all(), fieldnames=[self.GENE_LENGTH_COL])
queryset
=
Gene
.
objects
.
all
()
if
not
queryset
.
exists
():
return
Response
(
{
'results'
:
{}
},
{},
status
=
HTTP_204_NO_CONTENT
)
return
Response
(
{
'results'
:
self
.
_count_windows
(
df
,
window_size
=
window_size
,
stop_at
=
stop_at
)}
{
'results'
:
self
.
_count_windows
(
queryset
,
window_size
=
window_size
,
stop_at
=
stop_at
)}
)
backend/metagenedb/api/catalog/views/test_gene.py
View file @
bd68a8c4
import
pandas
as
pd
from
django.contrib.auth.models
import
User
from
django.test
import
TestCase
from
django.urls
import
reverse
...
...
@@ -6,7 +5,6 @@ from rest_framework import status
from
rest_framework.test
import
APITestCase
from
rest_framework_jwt.settings
import
api_settings
from
metagenedb.api.catalog.views.gene
import
GeneViewSet
from
metagenedb.apps.catalog.factory
import
GeneFactory
from
metagenedb.common.utils.mocks.metagenedb
import
MetageneDBCatalogGeneAPIMock
...
...
@@ -41,43 +39,17 @@ class TestGenes(TestCase):
self
.
assertEqual
(
resp
.
status_code
,
status
.
HTTP_200_OK
)
class
TestCountWindows
(
TestCase
):
def
setUp
(
self
):
self
.
window_col
=
"length"
self
.
df
=
pd
.
DataFrame
(
[
22
,
29
,
35
],
columns
=
[
self
.
window_col
]
)
def
test_simple_count_window10
(
self
):
expected_dict
=
{
'labels'
:
[
'<0.01k'
,
'0.01k-0.02k'
,
'0.02k-0.03k'
,
'>0.03k'
],
'counts'
:
[
0
,
0
,
2
,
1
]
}
geneviewset
=
GeneViewSet
()
test_dict
=
geneviewset
.
_count_windows
(
self
.
df
,
10
,
window_col
=
self
.
window_col
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
def
test_simple_count_window10_stop20
(
self
):
expected_dict
=
{
'labels'
:
[
'<0.01k'
,
'0.01k-0.02k'
,
'>0.02k'
],
'counts'
:
[
0
,
0
,
3
]
}
geneviewset
=
GeneViewSet
()
test_dict
=
geneviewset
.
_count_windows
(
self
.
df
,
window_size
=
10
,
window_col
=
self
.
window_col
,
stop_at
=
20
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
class
TestCountWindowsAPI
(
APITestCase
):
def
setUp
(
self
):
self
.
gene_api
=
MetageneDBCatalogGeneAPIMock
(
self
.
client
)
for
i
in
range
(
2000
,
4000
,
350
):
GeneFactory
.
create
(
length
=
i
)
def
test_gene_length_no_content
(
self
):
self
.
assertFalse
(
self
.
gene_api
.
get_gene_length
())
def
test_gene_length_api
(
self
):
for
i
in
range
(
2000
,
4000
,
350
):
GeneFactory
.
create
(
length
=
i
)
expected_dict
=
{
'results'
:
{
'counts'
:
[
0
,
0
,
3
,
3
],
...
...
@@ -87,6 +59,8 @@ class TestCountWindowsAPI(APITestCase):
self
.
assertDictEqual
(
self
.
gene_api
.
get_gene_length
(),
expected_dict
)
def
test_gene_length_api_stop_at_2000
(
self
):
for
i
in
range
(
2000
,
4000
,
350
):
GeneFactory
.
create
(
length
=
i
)
expected_dict
=
{
'results'
:
{
'counts'
:
[
0
,
0
,
6
],
...
...
backend/metagenedb/common/utils/mocks/metagenedb.py
View file @
bd68a8c4
...
...
@@ -52,6 +52,8 @@ class MetageneDBCatalogGeneAPIMock(MetageneDBAPIMock):
response
=
self
.
client
.
get
(
reverse
(
reverse_path
),
params
)
if
response
.
status_code
in
self
.
BAD_REQUESTS
:
raise
HTTPError
if
response
.
status_code
==
204
:
# no content
return
{}
return
response
.
json
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment