Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
d6ad306e
Commit
d6ad306e
authored
May 29, 2020
by
Kenzo-Hugo Hillion
♻
Browse files
add tool to compute all taxonomy entries present in at least one gene of the catalog
parent
357426f3
Pipeline
#31343
passed with stages
in 3 minutes and 18 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/management/commands/compute_stats.py
View file @
d6ad306e
...
...
@@ -140,9 +140,34 @@ class ComputeTaxonomyRepartition(ComputeStatistics):
self
.
_save_to_db
(
payload
)
class
ComputeTaxonomyPresence
(
ComputeStatistics
):
ALL_LEVEL
=
[
'kingdom'
,
'superkingdom'
,
'phylum'
,
'class'
,
'order'
,
'family'
,
'genus'
,
'species'
]
def
all
(
self
):
for
gene_source
in
self
.
GENE_SOURCES
:
if
gene_source
==
'all'
:
filters
=
{}
else
:
filters
=
{
'source'
:
gene_source
}
gene_stats
=
GeneStatistics
(
filters
=
filters
)
for
level
in
self
.
ALL_LEVEL
:
stats_id
=
slugify
(
f
"GeneStatistics(
{
gene_source
}
).present_taxonomy(
{
level
}
)"
)
logger
.
info
(
"Call GeneStatistics.present_taxonomy(%s) and saving under id <%s>"
,
level
,
stats_id
)
payload
=
{
'stats_id'
:
stats_id
,
'body'
:
gene_stats
.
present_taxonomy
(
level
=
level
)
}
self
.
_save_to_db
(
payload
)
class
Command
(
BaseCommand
):
help
=
"Compute gene catalog statistics."
STEP_CHOICES
=
[
'clean'
,
'counts'
,
'gene-length'
,
'taxonomy'
]
STEP_CHOICES
=
[
'clean'
,
'counts'
,
'gene-length'
,
'taxonomy
_repartition'
,
'taxonomy_presence
'
]
def
add_arguments
(
self
,
parser
):
parser
.
add_argument
(
'--only'
,
help
=
f
'Run only one step (choices:
{
self
.
STEP_CHOICES
}
).'
)
...
...
@@ -168,5 +193,7 @@ class Command(BaseCommand):
ComputeCounts
().
all
()
if
only_step
is
None
or
only_step
==
"gene-length"
:
ComputeGeneLength
().
all
()
if
only_step
is
None
or
only_step
==
"taxonomy"
:
if
only_step
is
None
or
only_step
==
"taxonomy
_repartition
"
:
ComputeTaxonomyRepartition
().
all
()
if
only_step
is
None
or
only_step
==
"taxonomy_presence"
:
ComputeTaxonomyPresence
().
all
()
backend/metagenedb/apps/catalog/operations/statistics.py
View file @
d6ad306e
...
...
@@ -3,7 +3,7 @@ from collections import defaultdict
from
django.db.models
import
Max
from
metagenedb.apps.catalog.models
import
Gene
from
metagenedb.apps.catalog.models
import
Gene
,
Taxonomy
from
metagenedb.common.utils.color_generator
import
generate_color_code
from
metagenedb.common.utils.dict
import
extract_labels_and_values
...
...
@@ -71,6 +71,21 @@ class GeneStatistics(Statistics):
'counts'
:
results
[
1
],
}
def
present_taxonomy
(
self
,
level
=
"phylum"
):
queryset
=
self
.
get_queryset
().
select_related
(
f
'taxonomy__
{
level
}
'
)
filter_annotation
=
{
f
"taxonomy__hierarchy__
{
level
}
__isnull"
:
False
}
value_to_retrieve
=
f
'taxonomy__hierarchy__
{
level
}
__tax_id'
all_tax_ids
=
[
el
[
0
]
for
el
in
queryset
.
filter
(
**
filter_annotation
).
values_list
(
value_to_retrieve
)]
all_unique_tax_ids
=
list
(
set
(
all_tax_ids
))
results
=
{
'tax_ids'
:
[],
'tax_names'
:
[]
}
for
taxonomy
in
Taxonomy
.
objects
.
filter
(
tax_id__in
=
all_unique_tax_ids
):
results
[
'tax_ids'
].
append
(
taxonomy
.
tax_id
)
results
[
'tax_names'
].
append
(
taxonomy
.
name
)
return
results
class
GeneLengthDistribution
(
Statistics
):
model
=
Gene
...
...
backend/metagenedb/apps/catalog/operations/test_statistics.py
View file @
d6ad306e
from
rest_framework.test
import
APITestCase
from
metagenedb.common.utils.color_generator
import
generate_color_code
from
metagenedb.apps.catalog.factory
import
(
GeneFactory
,
GeneWithEggNOGFactory
,
GeneWithKeggFactory
,
TaxonomyFactory
)
...
...
@@ -13,11 +14,22 @@ class BaseTestGeneStatistics(APITestCase):
self
.
gene_stats
=
GeneStatistics
()
class
TestTaxonomy
Repartition
(
BaseTestGeneStatistics
):
class
Base
TestTaxonomy
(
BaseTestGeneStatistics
):
@
classmethod
def
setUpTestData
(
cls
):
cls
.
parent_root
=
TaxonomyFactory
(
rank
=
"root"
)
cls
.
phylum
=
TaxonomyFactory
(
rank
=
'phylum'
)
cls
.
phylum
.
parent
=
cls
.
parent_root
cls
.
phylum
.
save
()
cls
.
phylum
.
build_hierarchy
()
cls
.
class_tax
=
TaxonomyFactory
(
rank
=
'class'
)
cls
.
class_tax
.
parent
=
cls
.
phylum
cls
.
class_tax
.
save
()
cls
.
class_tax
.
build_hierarchy
()
class
TestTaxonomyRepartition
(
BaseTestTaxonomy
):
def
test_taxonomy_counts_no_content
(
self
):
expected_dict
=
{
...
...
@@ -27,35 +39,69 @@ class TestTaxonomyRepartition(BaseTestGeneStatistics):
}
self
.
assertDictEqual
(
self
.
gene_stats
.
taxonomy_repartition
(),
expected_dict
)
def
test_taxonom_counts_no_annotation
(
self
):
gene
=
GeneFactory
.
create
()
# noqa
expected_dict
=
{
'labels'
:
[
'No annotation'
],
'counts'
:
[
1
],
'colors'
:
[
generate_color_code
(
'No annotation'
)]
}
self
.
assertDictEqual
(
self
.
gene_stats
.
taxonomy_repartition
(),
expected_dict
)
def
test_taxonomy_repartition
(
self
):
tax_name
=
"TaxTest"
taxonomy
=
TaxonomyFactory
(
rank
=
'phylum'
,
name
=
tax_name
)
taxonomy
.
parent
=
self
.
parent_root
taxonomy
.
save
()
taxonomy
.
build_hierarchy
()
gene
=
GeneFactory
.
create
(
taxonomy
=
taxonomy
)
# noqa
gene
=
GeneFactory
.
create
(
taxonomy
=
self
.
phylum
)
# noqa
expected_dict
=
{
'labels'
:
[
tax_
name
],
'labels'
:
[
self
.
phylum
.
name
],
'counts'
:
[
1
],
'colors'
:
[
'#c989eb'
]
'colors'
:
[
generate_color_code
(
self
.
phylum
.
name
)
]
}
self
.
assertDictEqual
(
self
.
gene_stats
.
taxonomy_repartition
(),
expected_dict
)
def
test_taxonomy_counts_class_level
(
self
):
tax_name
=
"TaxTest"
taxonomy
=
TaxonomyFactory
(
rank
=
'class'
,
name
=
tax_name
)
taxonomy
.
parent
=
self
.
parent_root
taxonomy
.
save
()
taxonomy
.
build_hierarchy
()
gene
=
GeneFactory
.
create
(
taxonomy
=
taxonomy
)
# noqa
gene
=
GeneFactory
.
create
(
taxonomy
=
self
.
class_tax
)
# noqa
expected_dict
=
{
'labels'
:
[
tax
_
name
],
'labels'
:
[
self
.
class_
tax
.
name
],
'counts'
:
[
1
],
'colors'
:
[
'#c989eb'
]
'colors'
:
[
generate_color_code
(
self
.
class_tax
.
name
)
]
}
self
.
assertDictEqual
(
self
.
gene_stats
.
taxonomy_repartition
(
level
=
'class'
),
expected_dict
)
class
TestPresentTaxonomy
(
BaseTestTaxonomy
):
def
test_present_taxonomy_no_content
(
self
):
expected_dict
=
{
'tax_ids'
:
[],
'tax_names'
:
[]
}
self
.
assertDictEqual
(
self
.
gene_stats
.
present_taxonomy
(),
expected_dict
)
def
test_present_taxonomy
(
self
):
gene
=
GeneFactory
.
create
(
taxonomy
=
self
.
phylum
)
# noqa
expected_dict
=
{
'tax_ids'
:
[
self
.
phylum
.
tax_id
],
'tax_names'
:
[
self
.
phylum
.
name
]
}
self
.
assertDictEqual
(
self
.
gene_stats
.
present_taxonomy
(),
expected_dict
)
def
test_present_taxonomy_multiple_genes
(
self
):
# Create 10 genes with the same taxonomy
GeneFactory
.
create_batch
(
10
,
taxonomy
=
self
.
phylum
)
# noqa
expected_dict
=
{
'tax_ids'
:
[
self
.
phylum
.
tax_id
],
'tax_names'
:
[
self
.
phylum
.
name
]
}
self
.
assertDictEqual
(
self
.
gene_stats
.
present_taxonomy
(),
expected_dict
)
def
test_taxonomy_counts_class_level
(
self
):
gene
=
GeneFactory
.
create
(
taxonomy
=
self
.
class_tax
)
# noqa
expected_dict
=
{
'tax_ids'
:
[
self
.
class_tax
.
tax_id
],
'tax_names'
:
[
self
.
class_tax
.
name
]
}
self
.
assertDictEqual
(
self
.
gene_stats
.
present_taxonomy
(
level
=
'class'
),
expected_dict
)
class
TestCounts
(
BaseTestGeneStatistics
):
@
classmethod
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment