Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
10c4b4ad
Commit
10c4b4ad
authored
Nov 22, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
Check if function exists in the db or remove it from payload
parent
3bd58750
Pipeline
#18637
passed with stages
in 2 minutes and 19 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/serializers/asymetricslugrelatedfield.py
View file @
10c4b4ad
from
collections
import
OrderedDict
from
rest_framework
import
serializers
from
rest_framework
.relations
import
SlugRelatedField
class
AsymetricSlugRelatedField
(
serializers
.
SlugRelatedField
):
class
AsymetricSlugRelatedField
(
SlugRelatedField
):
def
to_representation
(
self
,
value
):
return
self
.
serializer_class
(
value
).
data
...
...
backend/scripts/populate_db/import_igc_data.py
View file @
10c4b4ad
...
...
@@ -5,7 +5,7 @@ import os
import
sys
from
itertools
import
islice
from
bioapi
import
MetageneDBCatalogGeneAPI
,
MetageneDBCatalogTaxonomyAPI
from
bioapi
import
MetageneDBCatalogFunctionAPI
,
MetageneDBCatalogGeneAPI
,
MetageneDBCatalogTaxonomyAPI
from
requests.exceptions
import
HTTPError
from
slugify
import
slugify
...
...
@@ -18,6 +18,7 @@ logger = logging.getLogger()
class
ImportIGCGenes
(
object
):
METAGENEDB_GENE_API
=
MetageneDBCatalogGeneAPI
METAGENEDB_TAXONOMY_API
=
MetageneDBCatalogTaxonomyAPI
METAGENEDB_FUNCTION_API
=
MetageneDBCatalogFunctionAPI
PHYLUM_COL
=
'taxo_phylum'
GENUS_COL
=
'taxo_genus'
...
...
@@ -28,6 +29,7 @@ class ImportIGCGenes(object):
self
.
url
=
url
self
.
metagenedb_gene_api
=
self
.
METAGENEDB_GENE_API
(
base_url
=
self
.
url
)
self
.
metagenedb_taxonomy_api
=
self
.
METAGENEDB_TAXONOMY_API
(
base_url
=
self
.
url
)
self
.
metagenedb_function_api
=
self
.
METAGENEDB_FUNCTION_API
(
base_url
=
self
.
url
)
self
.
total_genes
=
self
.
_get_number_genes
()
self
.
_reset_counters
()
# Skip some insertion if specified in script options
...
...
@@ -53,6 +55,24 @@ class ImportIGCGenes(object):
counter
+=
1
return
mapping
def
build_function_catalog
(
self
,
page_size
=
1000
):
logger
.
info
(
"Building local function catalog..."
)
counter
=
1
next_page
=
None
functions
=
set
()
while
counter
==
1
or
next_page
is
not
None
:
params
=
{
'page'
:
counter
,
'page_size'
:
page_size
,
}
current_page
=
self
.
metagenedb_function_api
.
get_all
(
params
=
params
)
next_page
=
current_page
[
'next'
]
functions
=
functions
.
union
(
set
(
[
item
[
'function_id'
]
for
item
in
current_page
[
'results'
]]
))
counter
+=
1
self
.
metagenedb_functions
=
functions
def
build_mapping
(
self
,
page_size
=
1000
):
self
.
phylum_mapping
=
self
.
_build_taxo_mapping
(
"phylum"
,
page_size
=
page_size
)
self
.
genus_mapping
=
self
.
_build_taxo_mapping
(
"genus"
,
page_size
=
page_size
)
...
...
@@ -102,6 +122,15 @@ class ImportIGCGenes(object):
selected_dict
=
{
k
:
v
for
k
,
v
in
all_dict
.
items
()
if
k
in
selected_keys
}
return
selected_dict
def
_clean_functions
(
self
,
functions
):
clean_functions
=
[]
for
function
in
functions
:
if
function
in
self
.
metagenedb_functions
:
clean_functions
.
append
(
function
)
elif
function
!=
'unknown'
:
logger
.
warning
(
"Function %s not found in metagenedb"
,
function
)
return
clean_functions
def
_clean_gene
(
self
,
gene_dict
):
gene_dict
[
'gene_name'
]
=
gene_dict
[
'gene_id'
]
gene_dict
[
'gene_id'
]
=
slugify
(
gene_dict
[
'gene_id'
])
...
...
@@ -109,12 +138,15 @@ class ImportIGCGenes(object):
gene_dict
=
self
.
_select_taxonomy
(
gene_dict
)
if
self
.
skip_functions
or
'unknown'
in
gene_dict
[
'functions'
]:
gene_dict
.
pop
(
'functions'
)
else
:
gene_dict
[
'functions'
]
=
self
.
_clean_functions
(
gene_dict
[
'functions'
])
return
gene_dict
def
load_annotation_file_to_db_in_chunks
(
self
,
chunk_size
=
1000
,
test
=
False
):
# Build mapping for different phylum and genus
if
not
self
.
skip_tax
:
self
.
build_mapping
()
if
not
self
.
skip_functions
:
self
.
build_function_catalog
()
with
open
(
self
.
annotation_file
,
'r'
)
as
file
:
while
True
:
chunk_genes
=
list
(
islice
(
file
,
chunk_size
))
...
...
backend/scripts/populate_db/test_import_igc_data.py
View file @
10c4b4ad
...
...
@@ -2,8 +2,8 @@ from unittest import TestCase
from
rest_framework.test
import
APITestCase
from
metagenedb.common.utils.mocks.metagenedb
import
MetageneDBCatalogTaxonomyAPIMock
from
metagenedb.apps.catalog.factory
import
TaxonomyFactory
from
metagenedb.common.utils.mocks.metagenedb
import
MetageneDBCatalogTaxonomyAPIMock
,
MetageneDBCatalogFunctionAPIMock
from
metagenedb.apps.catalog.factory
import
TaxonomyFactory
,
FunctionFactory
from
scripts.populate_db.import_igc_data
import
ImportIGCGenes
...
...
@@ -73,6 +73,7 @@ class TestCleanGene(TestCase):
def
setUp
(
self
):
self
.
import_igc_genes
=
ImportIGCGenes
(
'test'
,
'test'
)
self
.
import_igc_genes
.
_select_taxonomy
=
lambda
x
:
x
# Mock to return same dict
self
.
import_igc_genes
.
_clean_functions
=
lambda
x
:
x
self
.
gene_dict
=
{
'gene_id'
:
'gene.01'
,
'length'
:
135
,
...
...
@@ -240,3 +241,22 @@ class TestBuildTaxoMapping(APITestCase):
self
.
import_igc_genes
.
build_mapping
(
page_size
=
100
)
self
.
assertDictEqual
(
self
.
import_igc_genes
.
genus_mapping
,
expected_genus_dict
)
self
.
assertDictEqual
(
self
.
import_igc_genes
.
phylum_mapping
,
expected_phylum_dict
)
class
TestBuildBuildFunctionCatalog
(
APITestCase
):
@
classmethod
def
setUpTestData
(
cls
):
cls
.
functions
=
FunctionFactory
.
create_batch
(
100
)
def
setUp
(
self
):
self
.
import_igc_genes
=
ImportIGCGenes
(
'test'
,
'test'
)
self
.
api_mock
=
MetageneDBCatalogFunctionAPIMock
(
self
.
client
)
self
.
import_igc_genes
.
metagenedb_function_api
=
self
.
api_mock
def
test_build_catalog
(
self
):
expected_catalog
=
set
(
[
function
.
function_id
for
function
in
self
.
functions
]
)
self
.
import_igc_genes
.
build_function_catalog
(
page_size
=
100
)
self
.
assertSetEqual
(
self
.
import_igc_genes
.
metagenedb_functions
,
expected_catalog
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment