Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
05fddb8e
Commit
05fddb8e
authored
Aug 27, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Use API in IGC import script
parent
1f715857
Pipeline
#13987
passed with stages
in 1 minute and 40 seconds
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/api/catalog/filters/taxonomy.py
View file @
05fddb8e
...
...
@@ -6,4 +6,4 @@ class TaxonomyFilter(filters.FilterSet):
class
Meta
:
model
=
Taxonomy
fields
=
[
'rank'
]
fields
=
[
'rank'
,
'name'
]
backend/metagenedb/api/catalog/views/taxonomy.py
View file @
05fddb8e
from
django_filters
import
rest_framework
as
filters
from
rest_framework.viewsets
import
ModelViewSet
from
metagenedb.api.catalog.filters
import
TaxonomyFilter
...
...
backend/metagenedb/apps/catalog/migrations/0009_meta_taxonomy.py
0 → 100644
View file @
05fddb8e
# Generated by Django 2.2.4 on 2019-08-27 11:05
from
django.db
import
migrations
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'catalog'
,
'0008_gene_id_slug'
),
]
operations
=
[
migrations
.
AlterModelOptions
(
name
=
'taxonomy'
,
options
=
{
'ordering'
:
[
'-tax_id'
],
'verbose_name_plural'
:
'Taxonomy'
},
),
]
backend/metagenedb/apps/catalog/serializers/gene.py
View file @
05fddb8e
...
...
@@ -25,6 +25,13 @@ class GeneSerializer(serializers.ModelSerializer):
model
=
Gene
fields
=
(
'gene_id'
,
'gene_name'
,
'length'
,
'functions'
,
'taxonomy'
)
def
_extract_many_to_many
(
self
,
validated_data
,
info
):
many_to_many
=
{}
for
field_name
,
relation_info
in
info
.
relations
.
items
():
if
relation_info
.
to_many
and
(
field_name
in
validated_data
):
many_to_many
[
field_name
]
=
validated_data
.
pop
(
field_name
)
return
many_to_many
def
_handle_functions
(
self
,
functions
,
instance
):
for
function
in
functions
:
try
:
...
...
@@ -36,17 +43,10 @@ class GeneSerializer(serializers.ModelSerializer):
_LOGGER
.
warning
(
f
"
{
function
.
get
(
'function_id'
)
}
not found for
{
instance
.
gene_id
}
. Function ignored"
)
def
create
(
self
,
validated_data
):
ModelClass
=
self
.
Meta
.
model
# Remove many-to-many relationships from validated_data.
# They are not valid arguments to the default `.create()` method,
# as they require that the instance has already been saved.
info
=
model_meta
.
get_field_info
(
ModelClass
)
many_to_many
=
{}
for
field_name
,
relation_info
in
info
.
relations
.
items
():
if
relation_info
.
to_many
and
(
field_name
in
validated_data
):
many_to_many
[
field_name
]
=
validated_data
.
pop
(
field_name
)
# Remove many-to-many relationships from validated_data.
many_to_many
=
self
.
_extract_many_to_many
(
validated_data
,
info
)
try
:
instance
=
ModelClass
.
_default_manager
.
create
(
**
validated_data
)
...
...
@@ -70,9 +70,29 @@ class GeneSerializer(serializers.ModelSerializer):
)
raise
TypeError
(
msg
)
# Save many-to-many relationships after the instance is created.
print
(
many_to_many
)
# Link existing many-to-many relationships after the instance is created.
if
many_to_many
:
for
field_name
,
value
in
many_to_many
.
items
():
getattr
(
self
,
f
'_handle_
{
field_name
}
'
,
None
)(
value
,
instance
)
return
instance
def
update
(
self
,
instance
,
validated_data
):
ModelClass
=
self
.
Meta
.
model
info
=
model_meta
.
get_field_info
(
ModelClass
)
# Remove many-to-many relationships from validated_data.
many_to_many
=
self
.
_extract_many_to_many
(
validated_data
,
info
)
for
attr
,
value
in
validated_data
.
items
():
if
attr
in
info
.
relations
and
info
.
relations
[
attr
].
to_many
:
field
=
getattr
(
instance
,
attr
)
field
.
set
(
value
)
else
:
setattr
(
instance
,
attr
,
value
)
# Link existing many-to-many relationships.
if
many_to_many
:
for
field_name
,
value
in
many_to_many
.
items
():
getattr
(
self
,
f
'_handle_
{
field_name
}
'
,
None
)(
value
,
instance
)
instance
.
save
()
return
instance
backend/metagenedb/common/utils/api/__init__.py
View file @
05fddb8e
from
.togows
import
TogoWSEntryAPI
# noqa
from
.metagenedb
import
MetageneDBCatalogGeneAPI
# noqa
from
.metagenedb
import
(
MetageneDBCatalogGeneAPI
,
MetageneDBCatalogTaxonomyAPI
,
# noqa
MetageneDBCatalogFunctionAPI
)
backend/metagenedb/common/utils/api/baseapi.py
View file @
05fddb8e
...
...
@@ -33,8 +33,8 @@ class BaseAPI(object):
self
.
session
=
self
.
SESSION
()
self
.
session
.
headers
.
update
(
self
.
HEADERS
)
def
get_all
(
self
):
response
=
self
.
session
.
get
(
self
.
url
)
def
get_all
(
self
,
params
=
None
):
response
=
self
.
session
.
get
(
self
.
url
,
params
=
params
)
response
.
raise_for_status
()
return
response
.
json
()
...
...
backend/metagenedb/common/utils/api/metagenedb.py
View file @
05fddb8e
...
...
@@ -11,3 +11,11 @@ class MetageneDBAPI(BaseAPI):
class
MetageneDBCatalogGeneAPI
(
MetageneDBAPI
):
ROUTE
=
'api/catalog/v1/genes/'
class
MetageneDBCatalogTaxonomyAPI
(
MetageneDBAPI
):
ROUTE
=
'api/catalog/v1/taxonomy/'
class
MetageneDBCatalogFunctionAPI
(
MetageneDBAPI
):
ROUTE
=
'api/catalog/v1/functions/'
backend/metagenedb/common/utils/mocks/__init__.py
0 → 100644
View file @
05fddb8e
backend/metagenedb/common/utils/mocks/metagenedb.py
0 → 100644
View file @
05fddb8e
from
requests.exceptions
import
HTTPError
from
django.urls
import
reverse
from
django.utils.http
import
urlencode
from
metagenedb.common.utils.api
import
MetageneDBCatalogGeneAPI
class
MetageneDBAPIMock
(
MetageneDBCatalogGeneAPI
):
"""
Just a simple mock to go through the Test client. The idea is to test the upsert behaviour and not
the insertion to the db.
"""
KEY_ID
=
''
BASE_REVERSE
=
'api'
REVERSE_PATH
=
''
def
__init__
(
self
,
client
):
self
.
client
=
client
self
.
reverse_path
=
':'
.
join
([
self
.
BASE_REVERSE
,
self
.
REVERSE_PATH
])
def
get_all
(
self
,
params
=
None
):
url
=
reverse
(
f
'
{
self
.
reverse_path
}
-list'
)
if
params
is
not
None
:
query_params
=
urlencode
(
params
)
return
self
.
client
.
get
(
f
"
{
url
}
?
{
query_params
}
"
).
json
()
return
self
.
client
.
get
(
f
"
{
url
}
"
).
json
()
def
get
(
self
,
entry_id
):
response
=
self
.
client
.
get
(
reverse
(
f
'
{
self
.
reverse_path
}
-detail'
,
kwargs
=
{
self
.
KEY_ID
:
entry_id
}))
if
response
.
status_code
==
404
:
raise
HTTPError
return
response
.
json
()
def
post
(
self
,
data
):
response
=
self
.
client
.
post
(
reverse
(
f
'
{
self
.
reverse_path
}
-list'
),
data
,
format
=
'json'
)
if
response
.
status_code
==
400
:
raise
HTTPError
return
response
.
json
()
def
put
(
self
,
entry_id
,
data
):
return
self
.
client
.
put
(
reverse
(
f
'
{
self
.
reverse_path
}
-detail'
,
kwargs
=
{
self
.
KEY_ID
:
entry_id
}),
data
,
format
=
'json'
).
json
()
class
MetageneDBCatalogGeneAPIMock
(
MetageneDBAPIMock
):
KEY_ID
=
'gene_id'
REVERSE_PATH
=
'catalog:v1:genes'
class
MetageneDBCatalogTaxonomyAPIMock
(
MetageneDBAPIMock
):
KEY_ID
=
'gene_id'
REVERSE_PATH
=
'catalog:v1:taxonomy'
backend/scripts/populate_db/import_igc_data.py
View file @
05fddb8e
#!/usr/bin/env python
import
argparse
import
logging
import
os
import
sys
from
itertools
import
islice
from
requests.exceptions
import
HTTPError
import
django
from
slugify
import
slugify
from
metagenedb.common.utils.api
import
MetageneDBCatalogGeneAPI
from
metagenedb.common.utils.api
import
MetageneDBCatalogGeneAPI
,
MetageneDBCatalogTaxonomyAPI
from
metagenedb.common.utils.parsers
import
IGCLineParser
# Before model import, we need to called django.setup() to Load apps
os
.
environ
.
setdefault
(
"DJANGO_SETTINGS_MODULE"
,
"metagenedb.settings"
)
django
.
setup
()
from
metagenedb.apps.catalog.models
import
Taxonomy
# noqa
logging
.
basicConfig
(
level
=
logging
.
INFO
)
_LOGGER
=
logging
.
getLogger
(
__name__
)
PHYLUM_COL
=
'taxo_phylum'
GENUS_COL
=
'taxo_genus'
SELECTED_KEYS
=
[
'gene_id'
,
'length'
,
'kegg_ko'
,
PHYLUM_COL
,
GENUS_COL
]
def
parse_gene
(
raw_line
,
selected_keys
=
SELECTED_KEYS
):
"""
Use IGCLineParser and return selected keys
"""
gene_parser
=
IGCLineParser
()
all_dict
=
gene_parser
.
gene
(
raw_line
)
selected_dict
=
{
k
:
v
for
k
,
v
in
all_dict
.
items
()
if
k
in
selected_keys
}
return
selected_dict
def
select_taxonomy
(
gene_dict
,
unknown_val
=
'unknown'
):
"""
Select the taxonomy to be assigned for the gene.
genus has priority on phylum. If both unknow, remove the taxonomy key
"""
phylum
=
gene_dict
.
pop
(
PHYLUM_COL
)
genus
=
gene_dict
.
pop
(
GENUS_COL
)
if
genus
!=
unknown_val
:
queryset
=
Taxonomy
.
objects
.
filter
(
name
=
genus
,
rank
=
"genus"
)
if
queryset
.
count
()
>
1
:
_LOGGER
.
warning
(
f
"More than 1 result found for genus
{
genus
}
. First result is kept."
)
gene_dict
.
update
(
{
'taxonomy'
:
queryset
[
0
].
tax_id
}
)
elif
phylum
!=
unknown_val
:
queryset
=
Taxonomy
.
objects
.
filter
(
name
=
phylum
,
rank
=
"phylum"
)
if
queryset
.
count
()
>
1
:
_LOGGER
.
warning
(
f
"More than 1 result found for phylum
{
phylum
}
. First result is kept."
)
gene_dict
.
update
(
{
'taxonomy'
:
queryset
[
0
].
tax_id
}
)
return
gene_dict
class
ImportIGCGenes
(
object
):
METAGENEDB_GENE_API
=
MetageneDBCatalogGeneAPI
METAGENEDB_TAXONOMY_API
=
MetageneDBCatalogTaxonomyAPI
PHYLUM_COL
=
'taxo_phylum'
GENUS_COL
=
'taxo_genus'
SELECTED_KEYS
=
[
'gene_id'
,
'length'
,
'kegg_ko'
,
PHYLUM_COL
,
GENUS_COL
]
def
__init__
(
self
,
annotation_file
,
url
,
skip_tax
=
False
,
skip_functions
=
False
):
self
.
annotation_file
=
annotation_file
self
.
url
=
url
self
.
metagenedb_gene_api
=
self
.
METAGENEDB_GENE_API
(
base_url
=
self
.
url
)
self
.
metagenedb_taxonomy_api
=
self
.
METAGENEDB_TAXONOMY_API
(
base_url
=
self
.
url
)
# Skip some insertion if specified in script options
self
.
skip_tax
=
skip_tax
self
.
skip_functions
=
skip_functions
def
_parse_gene
(
self
,
raw_line
,
selected_keys
=
SELECTED_KEYS
):
"""
Use IGCLineParser and return selected keys
"""
gene_parser
=
IGCLineParser
()
all_dict
=
gene_parser
.
gene
(
raw_line
)
selected_dict
=
{
k
:
v
for
k
,
v
in
all_dict
.
items
()
if
k
in
selected_keys
}
return
selected_dict
def
_select_taxonomy
(
self
,
taxonomy_dict
,
unknown_val
=
'unknown'
):
"""
Select the taxonomy to be assigned for the gene.
genus has priority on phylum. If both unknow, remove the taxonomy key
"""
phylum
=
taxonomy_dict
.
pop
(
self
.
PHYLUM_COL
)
genus
=
taxonomy_dict
.
pop
(
self
.
GENUS_COL
)
resp_dict
=
{}
if
genus
!=
unknown_val
:
resp_dict
=
self
.
metagenedb_taxonomy_api
.
get_all
(
params
=
{
'name'
:
genus
,
'rank'
:
'genus'
})
if
len
(
resp_dict
[
'results'
])
>
1
:
_LOGGER
.
warning
(
f
"More than 1 result found for genus
{
genus
}
. First result is kept."
)
elif
phylum
!=
unknown_val
:
resp_dict
=
self
.
metagenedb_taxonomy_api
.
get_all
(
params
=
{
'name'
:
phylum
,
'rank'
:
'phylum'
})
if
len
(
resp_dict
[
'results'
])
>
1
:
_LOGGER
.
warning
(
f
"More than 1 result found for phylum
{
phylum
}
. First result is kept."
)
if
resp_dict
:
taxonomy_dict
.
update
(
{
'taxonomy'
:
resp_dict
[
'results'
][
0
][
'tax_id'
]}
)
return
taxonomy_dict
def
_clean_gene
(
self
,
gene_dict
):
print
(
gene_dict
)
gene_dict
[
'gene_name'
]
=
gene_dict
[
'gene_id'
]
gene_dict
[
'gene_id'
]
=
slugify
(
gene_dict
[
'gene_id'
])
gene_dict
[
'functions'
]
=
[{
'function_id'
:
gene_dict
.
pop
(
'kegg_ko'
)}]
...
...
@@ -93,8 +83,8 @@ class ImportIGCGenes(object):
def
_insert_gene_list
(
self
,
chunk_genes
):
for
gene_line
in
chunk_genes
:
gene_dict
=
parse_gene
(
gene_line
)
gene_dict_with_taxo
=
select_taxonomy
(
gene_dict
)
gene_dict
=
self
.
_
parse_gene
(
gene_line
)
gene_dict_with_taxo
=
self
.
_
select_taxonomy
(
gene_dict
)
try
:
self
.
_upsert_gene
(
gene_dict_with_taxo
)
except
HTTPError
as
e
:
...
...
backend/scripts/populate_db/test_import_igc_data.py
View file @
05fddb8e
from
requests.exceptions
import
HTTPError
from
unittest
import
TestCase
import
pytest
from
django.urls
import
reverse
from
rest_framework.test
import
APITestCase
from
metagenedb.common.utils.api
import
MetageneDBCatalogGeneAPI
from
metagenedb.common.utils.mocks.metagenedb
import
(
MetageneDBCatalogGeneAPIMock
,
MetageneDBCatalogTaxonomyAPIMock
)
from
metagenedb.apps.catalog.factory
import
TaxonomyFactory
from
scripts.populate_db.import_igc_data
import
parse_gene
,
select_taxonomy
,
ImportIGCGenes
from
scripts.populate_db.import_igc_data
import
ImportIGCGenes
class
TestParseGene
(
TestCase
):
...
...
@@ -30,6 +29,7 @@ class TestParseGene(TestCase):
'cohort_assembled'
]
self
.
raw_line
=
"
\t
"
.
join
(
raw_data
)
self
.
import_igc_genes
=
ImportIGCGenes
(
'test'
,
'test'
)
def
test_parse_gene_default_selected_keys
(
self
):
"""
...
...
@@ -42,7 +42,7 @@ class TestParseGene(TestCase):
'taxo_phylum'
:
'taxo_phylum'
,
'taxo_genus'
:
'taxo_genus'
,
}
tested_dict
=
parse_gene
(
self
.
raw_line
)
tested_dict
=
self
.
import_igc_genes
.
_
parse_gene
(
self
.
raw_line
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
def
test_parse_gene
(
self
):
...
...
@@ -54,7 +54,7 @@ class TestParseGene(TestCase):
'gene_id'
:
'gene_name'
,
'length'
:
'length'
}
tested_dict
=
parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
tested_dict
=
self
.
import_igc_genes
.
_
parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
def
test_parse_gene_unknown_key
(
self
):
...
...
@@ -66,40 +66,10 @@ class TestParseGene(TestCase):
'gene_id'
:
'gene_name'
,
'length'
:
'length'
}
tested_dict
=
parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
tested_dict
=
self
.
import_igc_genes
.
_
parse_gene
(
self
.
raw_line
,
selected_keys
=
selected_keys
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
class
MetageneDBCatalogGeneAPIMock
(
MetageneDBCatalogGeneAPI
):
"""
Just a simple mock to go through the Test client. The idea is to test the upsert behaviour and not
the insertion to the db.
"""
def
__init__
(
self
,
client
):
self
.
client
=
client
self
.
reverse_path
=
'api:catalog:v1:genes'
def
get_all
(
self
):
return
self
.
client
.
get
(
reverse
(
f
'
{
self
.
reverse_path
}
-list'
)).
json
()
def
get
(
self
,
entry_id
):
response
=
self
.
client
.
get
(
reverse
(
f
'
{
self
.
reverse_path
}
-detail'
,
kwargs
=
{
'gene_id'
:
entry_id
}))
if
response
.
status_code
==
404
:
raise
HTTPError
return
response
.
json
()
def
post
(
self
,
data
):
response
=
self
.
client
.
post
(
reverse
(
f
'
{
self
.
reverse_path
}
-list'
),
data
,
format
=
'json'
)
if
response
.
status_code
==
400
:
raise
HTTPError
return
response
.
json
()
def
put
(
self
,
entry_id
,
data
):
return
self
.
client
.
put
(
reverse
(
f
'
{
self
.
reverse_path
}
-detail'
,
kwargs
=
{
'gene_id'
:
entry_id
}),
data
,
format
=
'json'
).
json
()
class
TestUpsertGene
(
APITestCase
):
def
setUp
(
self
):
...
...
@@ -191,8 +161,7 @@ class TestCleanGene(TestCase):
self
.
assertDictEqual
(
test_gene_dict
,
expected_gene_dict
)
@
pytest
.
mark
.
django_db
class
TestSelectTaxonomy
(
TestCase
):
class
TestSelectTaxonomy
(
APITestCase
):
def
setUp
(
self
):
self
.
genus_name
=
'Genus'
...
...
@@ -200,6 +169,9 @@ class TestSelectTaxonomy(TestCase):
self
.
unknown_name
=
'unknown'
self
.
genus
=
TaxonomyFactory
(
rank
=
"genus"
,
name
=
self
.
genus_name
)
self
.
phylum
=
TaxonomyFactory
(
rank
=
"phylum"
,
name
=
self
.
phylum_name
)
self
.
import_igc_genes
=
ImportIGCGenes
(
'test'
,
'test'
)
self
.
api_mock
=
MetageneDBCatalogTaxonomyAPIMock
(
self
.
client
)
self
.
import_igc_genes
.
metagenedb_taxonomy_api
=
self
.
api_mock
def
test_genus_only
(
self
):
gene_dict
=
{
...
...
@@ -213,7 +185,7 @@ class TestSelectTaxonomy(TestCase):
'length'
:
135
,
'taxonomy'
:
str
(
self
.
genus
.
tax_id
)
}
tested_dict
=
select_taxonomy
(
gene_dict
)
tested_dict
=
self
.
import_igc_genes
.
_
select_taxonomy
(
gene_dict
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
def
test_phylum_only
(
self
):
...
...
@@ -228,7 +200,7 @@ class TestSelectTaxonomy(TestCase):
'length'
:
135
,
'taxonomy'
:
str
(
self
.
phylum
.
tax_id
)
}
tested_dict
=
select_taxonomy
(
gene_dict
)
tested_dict
=
self
.
import_igc_genes
.
_
select_taxonomy
(
gene_dict
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
def
test_genus_phylum
(
self
):
...
...
@@ -243,7 +215,7 @@ class TestSelectTaxonomy(TestCase):
'length'
:
135
,
'taxonomy'
:
str
(
self
.
genus
.
tax_id
)
}
tested_dict
=
select_taxonomy
(
gene_dict
)
tested_dict
=
self
.
import_igc_genes
.
_
select_taxonomy
(
gene_dict
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
def
test_both_unknown
(
self
):
...
...
@@ -257,5 +229,5 @@ class TestSelectTaxonomy(TestCase):
'gene_id'
:
'gene'
,
'length'
:
135
}
tested_dict
=
select_taxonomy
(
gene_dict
)
tested_dict
=
self
.
import_igc_genes
.
_
select_taxonomy
(
gene_dict
)
self
.
assertDictEqual
(
tested_dict
,
expected_dict
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment