Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
8e05be86
Commit
8e05be86
authored
Dec 04, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Merge branch '67-statistics-on-genes' into 'dev'
Add more statistical graph about gene catalog Closes #67 See merge request
!23
parents
9650a34f
584a5f59
Pipeline
#19270
passed with stages
in 2 minutes and 23 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/api/catalog/qparams_validators/gene.py
View file @
8e05be86
from
marshmallow
import
Schema
,
fields
from
marshmallow.validate
import
OneOf
from
metagenedb.common.django_default.qparams_validators
import
PaginatedQueryParams
TAXA_CHOICES
=
[
'superkingdom'
,
'kingdom'
,
'phylum'
,
'class'
,
'order'
,
'family'
,
'genus'
,
'species'
]
class
GeneLengthQueryParams
(
Schema
):
window_size
=
fields
.
Integer
()
...
...
@@ -11,3 +16,7 @@ class GeneLengthQueryParams(Schema):
class
GeneQueryParams
(
PaginatedQueryParams
):
no_taxonomy
=
fields
.
Boolean
()
no_functions
=
fields
.
Boolean
()
class
TaxCountQueryParams
(
Schema
):
level
=
fields
.
String
(
validate
=
OneOf
(
choices
=
TAXA_CHOICES
))
backend/metagenedb/api/catalog/views/gene.py
View file @
8e05be86
from
django_pandas.io
import
read_frame
from
collections
import
defaultdict
from
django.db.models
import
Max
from
drf_yasg
import
openapi
from
drf_yasg.utils
import
swagger_auto_schema
from
marshmallow.exceptions
import
ValidationError
...
...
@@ -8,9 +10,8 @@ from rest_framework.status import HTTP_204_NO_CONTENT, HTTP_422_UNPROCESSABLE_EN
from
metagenedb.apps.catalog.models
import
Gene
from
metagenedb.api.catalog.filters
import
GeneFilter
from
metagenedb.api.catalog.qparams_validators.gene
import
GeneLengthQueryParams
,
GeneQueryParams
from
metagenedb.api.catalog.qparams_validators.gene
import
GeneLengthQueryParams
,
GeneQueryParams
,
TaxCountQueryParams
from
metagenedb.apps.catalog.serializers
import
GeneSerializer
from
metagenedb.common.utils.df_operations
import
get_mask
from
.bulk_viewset
import
BulkViewSet
...
...
@@ -56,18 +57,21 @@ class GeneViewSet(BulkViewSet):
DEFAULT_WINDOW_SIZE
=
1000
DEFAULT_STOP_AT
=
10000
DEFAULT_LEVEL
=
'phylum'
def
get_permissions
(
self
):
return
super
(
self
.
__class__
,
self
).
get_permissions
()
def
_count_windows
(
self
,
df
,
window_size
=
DEFAULT_WINDOW_SIZE
,
window_col
=
GENE_LENGTH_COL
,
stop_at
=
DEFAULT_STOP_AT
):
def
_count_windows
(
self
,
queryset
,
window_size
=
DEFAULT_WINDOW_SIZE
,
window_col
=
GENE_LENGTH_COL
,
stop_at
=
DEFAULT_STOP_AT
):
"""
Count how many
line of the df belong to each windows defined by the window_size for the window_col
:param
df
:
Count how many
entries by performing one query per range
:param
queryset
:
:param window_col: column concerned by the window
:param window_size: size of the window
:return: {'data': COUNTS_BY_WINDOW, 'labels': START-END}
"""
length_max
=
df
[
window_col
].
max
(
)
length_max
=
queryset
.
aggregate
(
Max
(
'length'
)).
get
(
'length__max'
,
0
)
stop_at
=
length_max
if
length_max
<
stop_at
else
stop_at
all_ranges
=
[[
i
,
i
+
window_size
]
for
i
in
range
(
0
,
stop_at
+
1
,
window_size
)]
all_ranges
[
-
1
][
1
]
=
length_max
+
1
# last should contain all above the stop_at
...
...
@@ -75,7 +79,7 @@ class GeneViewSet(BulkViewSet):
labels
=
[]
for
rg
in
all_ranges
:
labels
.
append
(
f
"
{
rg
[
0
]
/
1000
}
k-
{
rg
[
1
]
/
1000
}
k"
)
data
.
append
(
df
[
get_mask
(
df
,
rg
,
window_col
)].
count
()[
window_col
]
)
data
.
append
(
queryset
.
filter
(
length__gte
=
rg
[
0
],
length__lt
=
rg
[
1
]).
count
()
)
# Change labels
labels
[
0
]
=
f
"<
{
labels
[
0
].
split
(
'-'
)[
1
]
}
"
labels
[
-
1
]
=
f
">
{
labels
[
-
1
].
split
(
'-'
)[
0
]
}
"
...
...
@@ -105,12 +109,51 @@ class GeneViewSet(BulkViewSet):
window_size
=
query_params
.
get
(
'window_size'
,
self
.
DEFAULT_WINDOW_SIZE
)
stop_at
=
query_params
.
get
(
'stop_at'
,
self
.
DEFAULT_STOP_AT
)
df
=
read_frame
(
Gene
.
objects
.
all
(),
fieldnames
=
[
self
.
GENE_LENGTH_COL
])
if
df
.
empty
:
queryset
=
Gene
.
objects
.
all
()
if
not
queryset
.
exists
():
return
Response
(
{},
status
=
HTTP_204_NO_CONTENT
)
return
Response
(
{
'results'
:
self
.
_count_windows
(
queryset
,
window_size
=
window_size
,
stop_at
=
stop_at
)}
)
def
_taxonomy_counts
(
self
,
queryset
,
level
=
DEFAULT_LEVEL
):
filter_no_annotation
=
{
f
"taxonomy__
{
level
}
__isnull"
:
True
}
filter_annotation
=
{
f
"taxonomy__
{
level
}
__isnull"
:
False
}
value_to_retrieve
=
f
'taxonomy__
{
level
}
__name'
taxonomy_counts
=
{}
taxonomy_counts
[
'counts'
]
=
defaultdict
(
lambda
:
0
)
taxonomy_counts
[
'counts'
][
'No annotation'
]
=
queryset
.
filter
(
**
filter_no_annotation
).
values
().
count
()
if
taxonomy_counts
[
'counts'
][
'No annotation'
]
==
0
:
del
taxonomy_counts
[
'counts'
][
'No annotation'
]
for
value
in
queryset
.
filter
(
**
filter_annotation
).
values
(
value_to_retrieve
):
tax_name
=
value
[
value_to_retrieve
]
taxonomy_counts
[
'counts'
][
tax_name
]
+=
1
return
taxonomy_counts
@
action
(
methods
=
[
'get'
],
detail
=
False
)
def
taxonomy_counts
(
self
,
request
):
try
:
query_params
=
TaxCountQueryParams
().
load
(
request
.
query_params
)
except
ValidationError
as
validation_error
:
error_message
=
validation_error
.
normalized_messages
()
error_message
.
update
({
'allowed_query_params'
:
', '
.
join
(
GeneLengthQueryParams
().
declared_fields
.
keys
())
})
return
Response
(
error_message
,
status
=
HTTP_422_UNPROCESSABLE_ENTITY
)
level
=
query_params
.
get
(
'level'
,
self
.
DEFAULT_LEVEL
)
level
=
'class_rank'
if
level
==
'class'
else
level
# deal with class exception @TODO fix cleaner way
queryset
=
Gene
.
objects
.
all
().
select_related
(
f
'taxonomy__
{
level
}
'
)
if
not
queryset
.
exists
():
return
Response
(
{
'results'
:
{}
},
{},
status
=
HTTP_204_NO_CONTENT
)
counts
=
self
.
_taxonomy_counts
(
queryset
,
level
=
level
)
counts
[
'level'
]
=
query_params
.
get
(
'level'
,
self
.
DEFAULT_LEVEL
)
return
Response
(
{
'results'
:
self
.
_count_windows
(
df
,
window_size
=
window_size
,
stop_at
=
stop_at
)
}
{
'results'
:
counts
}
)
backend/metagenedb/api/catalog/views/test_gene.py
View file @
8e05be86
import
pandas
as
pd
from
django.contrib.auth.models
import
User
from
django.test
import
TestCase
from
django.urls
import
reverse
...
...
@@ -6,8 +5,7 @@ from rest_framework import status
from
rest_framework.test
import
APITestCase
from
rest_framework_jwt.settings
import
api_settings
from
metagenedb.api.catalog.views.gene
import
GeneViewSet
from
metagenedb.apps.catalog.factory
import
GeneFactory
from
metagenedb.apps.catalog.factory
import
GeneFactory
,
TaxonomyFactory
from
metagenedb.common.utils.mocks.metagenedb
import
MetageneDBCatalogGeneAPIMock
...
...
@@ -41,43 +39,17 @@ class TestGenes(TestCase):
self
.
assertEqual
(
resp
.
status_code
,
status
.
HTTP_200_OK
)
class
TestCountWindows
(
TestCase
):
def
setUp
(
self
):
self
.
window_col
=
"length"
self
.
df
=
pd
.
DataFrame
(
[
22
,
29
,
35
],
columns
=
[
self
.
window_col
]
)
def
test_simple_count_window10
(
self
):
expected_dict
=
{
'labels'
:
[
'<0.01k'
,
'0.01k-0.02k'
,
'0.02k-0.03k'
,
'>0.03k'
],
'counts'
:
[
0
,
0
,
2
,
1
]
}
geneviewset
=
GeneViewSet
()
test_dict
=
geneviewset
.
_count_windows
(
self
.
df
,
10
,
window_col
=
self
.
window_col
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
def
test_simple_count_window10_stop20
(
self
):
expected_dict
=
{
'labels'
:
[
'<0.01k'
,
'0.01k-0.02k'
,
'>0.02k'
],
'counts'
:
[
0
,
0
,
3
]
}
geneviewset
=
GeneViewSet
()
test_dict
=
geneviewset
.
_count_windows
(
self
.
df
,
window_size
=
10
,
window_col
=
self
.
window_col
,
stop_at
=
20
)
self
.
assertDictEqual
(
test_dict
,
expected_dict
)
class
TestCountWindowsAPI
(
APITestCase
):
def
setUp
(
self
):
self
.
gene_api
=
MetageneDBCatalogGeneAPIMock
(
self
.
client
)
for
i
in
range
(
2000
,
4000
,
350
):
GeneFactory
.
create
(
length
=
i
)
def
test_gene_length_no_content
(
self
):
self
.
assertFalse
(
self
.
gene_api
.
get_gene_length
())
def
test_gene_length_api
(
self
):
for
i
in
range
(
2000
,
4000
,
350
):
GeneFactory
.
create
(
length
=
i
)
expected_dict
=
{
'results'
:
{
'counts'
:
[
0
,
0
,
3
,
3
],
...
...
@@ -87,6 +59,8 @@ class TestCountWindowsAPI(APITestCase):
self
.
assertDictEqual
(
self
.
gene_api
.
get_gene_length
(),
expected_dict
)
def
test_gene_length_api_stop_at_2000
(
self
):
for
i
in
range
(
2000
,
4000
,
350
):
GeneFactory
.
create
(
length
=
i
)
expected_dict
=
{
'results'
:
{
'counts'
:
[
0
,
0
,
6
],
...
...
@@ -97,3 +71,47 @@ class TestCountWindowsAPI(APITestCase):
'stop_at'
:
2000
}
self
.
assertDictEqual
(
self
.
gene_api
.
get_gene_length
(
params
=
query_params
),
expected_dict
)
class
TestTaxonomyCountsAPI
(
APITestCase
):
def
setUp
(
self
):
self
.
gene_api
=
MetageneDBCatalogGeneAPIMock
(
self
.
client
)
def
test_taxonomy_counts_no_content
(
self
):
self
.
assertFalse
(
self
.
gene_api
.
get_tax_counts
())
def
test_taxonomy_counts_api
(
self
):
tax_name
=
"TaxTest"
taxonomy
=
TaxonomyFactory
(
rank
=
'phylum'
,
name
=
tax_name
)
taxonomy
.
phylum
=
taxonomy
# link taxonomy to itself as phylum
taxonomy
.
save
()
gene
=
GeneFactory
.
create
(
taxonomy
=
taxonomy
)
# noqa
expected_dict
=
{
'results'
:
{
'level'
:
'phylum'
,
'counts'
:
{
tax_name
:
1
}
}
}
self
.
assertDictEqual
(
self
.
gene_api
.
get_tax_counts
(),
expected_dict
)
def
test_taxonomy_counts_api_class_level
(
self
):
tax_name
=
"TaxTest"
taxonomy
=
TaxonomyFactory
(
rank
=
'class_rank'
,
name
=
tax_name
)
taxonomy
.
class_rank
=
taxonomy
# link taxonomy to itself as phylum
taxonomy
.
save
()
gene
=
GeneFactory
.
create
(
taxonomy
=
taxonomy
)
# noqa
expected_dict
=
{
'results'
:
{
'level'
:
'class'
,
'counts'
:
{
tax_name
:
1
}
}
}
query_params
=
{
'level'
:
'class'
}
self
.
assertDictEqual
(
self
.
gene_api
.
get_tax_counts
(
params
=
query_params
),
expected_dict
)
backend/metagenedb/apps/catalog/factory/gene.py
View file @
8e05be86
from
factory
import
DjangoModelFactory
,
fuzzy
from
factory
import
DjangoModelFactory
,
SubFactory
,
fuzzy
from
faker
import
Factory
from
metagenedb.apps.catalog
import
models
from
.fuzzy_base
import
FuzzyLowerText
from
.taxonomy
import
TaxonomyFactory
faker
=
Factory
.
create
()
...
...
@@ -17,3 +18,4 @@ class GeneFactory(DjangoModelFactory):
gene_id
=
FuzzyLowerText
(
prefix
=
'gene-'
,
length
=
15
)
gene_name
=
fuzzy
.
FuzzyText
(
prefix
=
'name-'
,
length
=
15
)
length
=
fuzzy
.
FuzzyInteger
(
200
,
10000
)
taxonomy
=
SubFactory
(
TaxonomyFactory
)
backend/metagenedb/common/utils/mocks/metagenedb.py
View file @
8e05be86
...
...
@@ -52,6 +52,17 @@ class MetageneDBCatalogGeneAPIMock(MetageneDBAPIMock):
response
=
self
.
client
.
get
(
reverse
(
reverse_path
),
params
)
if
response
.
status_code
in
self
.
BAD_REQUESTS
:
raise
HTTPError
if
response
.
status_code
==
204
:
# no content
return
{}
return
response
.
json
()
def
get_tax_counts
(
self
,
params
=
None
):
reverse_path
=
f
"
{
self
.
reverse_path
}
-taxonomy-counts"
response
=
self
.
client
.
get
(
reverse
(
reverse_path
),
params
)
if
response
.
status_code
in
self
.
BAD_REQUESTS
:
raise
HTTPError
if
response
.
status_code
==
204
:
# no content
return
{}
return
response
.
json
()
...
...
frontend/src/components/Doughnut.vue
0 → 100644
View file @
8e05be86
<
template
>
<v-card
class=
"pa-2"
>
<div
class=
"card-body"
>
<div
class=
"text-xs-center"
v-if=
"noGraph"
>
<v-progress-circular
indeterminate
color=
"secondary"
></v-progress-circular>
</div>
<div>
<canvas
:id=
"chartId"
></canvas>
</div>
<v-layout
justify-space-around
row
>
<v-flex
sm3
>
<v-switch
v-model=
"hideLegend"
label=
"Hide legend"
>
</v-switch>
</v-flex>
<v-flex
sm8
>
<v-select
v-model=
"hideLabels"
:items=
"this.labels"
attach
chips
:label=
"hideLabelsLabel"
multiple
clearable
></v-select>
</v-flex>
</v-layout>
</div>
</v-card>
</
template
>
<
script
>
import
Chart
from
'
chart.js
'
;
export
default
{
props
:
{
doughnutData
:
{
type
:
Object
,
required
:
true
,
},
chartId
:
String
,
},
data
()
{
return
{
myChart
:
{},
options
:
{},
colors
:
[],
hideLegend
:
false
,
labels
:
[],
hideLabels
:
[],
noGraph
:
true
,
}
},
computed
:
{
hideLabelsLabel
()
{
if
(
Object
.
entries
(
this
.
doughnutData
).
length
==
0
)
{
return
"
Hide fields
"
;
}
else
{
return
"
Hide
"
+
this
.
doughnutData
.
level
;
}
},
displayLegend
()
{
return
!
this
.
hideLegend
;
}
},
methods
:
{
createChart
()
{
const
ctx
=
document
.
getElementById
(
this
.
chartId
);
this
.
myChart
=
new
Chart
(
ctx
,
{
type
:
'
doughnut
'
,
});
},
updateChart
()
{
this
.
labels
=
Object
.
keys
(
this
.
doughnutData
.
data
);
this
.
generateColorList
();
this
.
updateChartOptions
();
this
.
updateChartData
();
},
generateColorList
()
{
// # TODO get out of this and refactor -> return list of colors
this
.
color
=
[];
for
(
var
i
=
0
;
i
<
Object
.
keys
(
this
.
doughnutData
.
data
).
length
;
i
++
)
{
this
.
colors
.
push
(
this
.
generateColor
())
}
},
generateColor
()
{
// Same as above
var
letters
=
'
0123456789ABCDEF
'
;
var
color
=
'
#
'
;
for
(
var
i
=
0
;
i
<
6
;
i
++
)
{
color
+=
letters
[
Math
.
floor
(
Math
.
random
()
*
16
)];
}
return
color
;
},
updateChartOptions
()
{
this
.
options
=
{
legend
:
{
display
:
this
.
displayLegend
,
}
};
this
.
myChart
.
options
=
this
.
options
;
this
.
myChart
.
update
();
},
updateChartData
()
{
const
dataDict
=
Object
.
assign
({},
this
.
doughnutData
.
data
);
for
(
let
i
=
0
;
i
<
this
.
hideLabels
.
length
;
i
++
)
{
delete
dataDict
[
this
.
hideLabels
[
i
]];
};
this
.
myChart
.
data
=
{
labels
:
Object
.
keys
(
dataDict
),
datasets
:
[
{
data
:
Object
.
values
(
dataDict
),
backgroundColor
:
this
.
colors
,
},
],
};
this
.
myChart
.
update
();
},
},
watch
:
{
doughnutData
(
val
)
{
if
(
this
.
noGraph
)
{
this
.
noGraph
=
false
;
this
.
createChart
();
}
this
.
updateChart
();
},
hideLegend
(
val
)
{
this
.
updateChartOptions
();
},
hideLabels
()
{
this
.
updateChartData
();
}
},
};
</
script
>
frontend/src/components/Histogram.vue
View file @
8e05be86
<
template
>
<v-flex
xs12
md6
xl4
>
<v-toolbar
:class=
"histoData.class"
dark
v-if=
"histoData.data"
>
<v-icon
class=
"white--text"
>
{{
histoData
.
icon
}}
</v-icon>
<v-toolbar-title>
{{
histoData
.
title
}}
</v-toolbar-title>
</v-toolbar>
<v-card
class=
"pa-2"
>
<div
class=
"card-body"
>
<div
v-if=
"histoData.data"
>
<canvas
:id=
"histoData.chart_id"
></canvas>
</div>
<div
class=
"text-xs-center"
v-else
>
<v-progress-circular
indeterminate
color=
"secondary"
></v-progress-circular>
</div>
<v-card
class=
"pa-2"
>
<div
class=
"card-body"
>
<div
class=
"text-xs-center"
v-if=
"noGraph"
>
<v-progress-circular
indeterminate
color=
"secondary"
></v-progress-circular>
</div>
</v-card>
</v-flex>
<div>
<canvas
:id=
"chartId"
></canvas>
</div>
</div>
</v-card>
</
template
>
<
script
>
...
...
@@ -33,14 +23,27 @@ export default {
type
:
Object
,
required
:
true
,
},
chartId
:
String
,
},
updated
()
{
this
.
createChart
(
this
.
histoData
.
chart_id
);
data
()
{
return
{
myChart
:
{},
options
:
{},
noGraph
:
true
,
}
},
methods
:
{
createChart
(
chartId
)
{
const
ctx
=
document
.
getElementById
(
chartId
);
const
histoData
=
{
createChart
()
{
const
ctx
=
document
.
getElementById
(
this
.
chartId
);
this
.
myChart
=
new
Chart
(
ctx
,
{
type
:
'
bar
'
,
});
},
updateChart
()
{
this
.
updateChartData
();
},
updateChartData
()
{
this
.
myChart
.
data
=
{
labels
:
this
.
histoData
.
labels
,
datasets
:
[
{
...
...
@@ -51,11 +54,16 @@ export default {
},
],
};
// eslint-disable-next-line
const
myChart
=
new
Chart
(
ctx
,
{
type
:
'
bar
'
,
data
:
histoData
,
});
this
.
myChart
.
update
();
},
},
watch
:
{
histoData
(
val
)
{
if
(
this
.
noGraph
)
{
this
.
noGraph
=
false
;
this
.
createChart
();
}
this
.
updateChart
();
},
},
};
...
...
frontend/src/views/Stats.vue
View file @
8e05be86
...
...
@@ -24,7 +24,56 @@
<countcard
:count=
"geneCountFull"
></countcard>
</v-layout>
<v-layout
row
wrap
>
<histogram
:histoData=
"geneLengthData"
></histogram>
<v-flex
xs12
md6
xl4
>
<v-toolbar
class=
"secondary"
dark
>
<v-icon
class=
"white--text"
>
bar_chart
</v-icon>
<v-toolbar-title>
Gene length distribution
</v-toolbar-title>
<v-spacer></v-spacer>
<v-flex
xs4
md2
>
<v-select
background-color=
"primary"
:items=
"this.windowSizeChoice"
label=
"Window Length"
color=
"secondary"
v-model=
"geneLengthWindowSize"
></v-select>
</v-flex>
<v-flex
xs4
md2
>
<v-select
background-color=
"primary"
:items=
"this.stopAtChoice"
label=
"Stop at"
color=
"secondary"
v-model=
"stopAt"
></v-select>
</v-flex>
</v-toolbar>
<histogram
:histoData=
"geneLengthData"
chartId=
"histo_gene_length"
></histogram>
</v-flex>
<!-- Doughnut -->
<v-flex
xs12
md6
xl4
>
<v-toolbar
class=
"secondary"
dark
>
<v-icon
class=
"white--text"
>
account_tree
</v-icon>
<v-toolbar-title>
Taxonomical annotation
</v-toolbar-title>
<v-spacer></v-spacer>
<v-flex
xs4
md2
>
<v-select
background-color=
"primary"
:items=
"selectLevel"
color=
"secondary"
label=
"Level"
v-model=
"taxLevel"
></v-select>
</v-flex>
</v-toolbar>
<doughnut
:doughnutData=
"taxoCounts"
chartId=
"taxo_annotation"
></doughnut>
</v-flex>
</v-layout>
</v-container>
</v-card>
...
...
@@ -37,6 +86,8 @@
import
axios
from
'
axios
'
;
import
Histogram
from
'
@/components/Histogram.vue
'
;
import
CountCard
from
'
@/components/CountCard.vue
'
;
import
Doughnut
from
'
@/components/Doughnut.vue
'
;
export
default
{
name
:
'
home
'
,
...
...
@@ -47,23 +98,45 @@ export default {
geneCountFunctions
:
{},
geneCountTaxo
:
{},
geneCountFull
:
{},
geneLengthWindowSize
:
4
00
,
geneLengthWindowSize
:
2
00
,
stopAt
:
5000
,
taxoCounts
:
{},
taxLevel
:
'
phylum
'
,
};
},
computed
:
{
stopAtChoice
()
{
let
listStopAt
=
[];
for
(
let
i
=
4000
;
i
<=
10000
;
i
+=
1000
)
{