Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
220b598c
Commit
220b598c
authored
Nov 19, 2019
by
Kenzo-Hugo Hillion
♻
Browse files
modify handling of taxonomy
parent
69643d1f
Changes
2
Hide whitespace changes
Inline
Side-by-side
backend/scripts/populate_db/import_igc_data.py
View file @
220b598c
...
...
@@ -65,9 +65,7 @@ class ImportIGCGenes(object):
if
len
(
resp_dict
[
'results'
])
>
1
:
logger
.
warning
(
f
"More than 1 result found for phylum
{
phylum
}
. First result is kept."
)
if
resp_dict
.
get
(
'count'
,
0
)
>
0
:
gene_dict
.
update
(
{
'taxonomy'
:
resp_dict
[
'results'
][
0
][
'tax_id'
]}
)
gene_dict
.
update
({
'taxonomy'
:
resp_dict
[
'results'
][
0
][
'tax_id'
]})
return
gene_dict
def
_parse_gene
(
self
,
raw_line
,
selected_keys
=
SELECTED_KEYS
):
...
...
@@ -83,8 +81,10 @@ class ImportIGCGenes(object):
gene_dict
[
'gene_name'
]
=
gene_dict
[
'gene_id'
]
gene_dict
[
'gene_id'
]
=
slugify
(
gene_dict
[
'gene_id'
])
gene_dict
[
'functions'
]
=
gene_dict
.
pop
(
'kegg_ko'
)
if
self
.
skip_tax
:
if
gene_dict
.
get
(
'taxonomy'
,
None
)
==
'unknown'
or
self
.
skip_tax
:
gene_dict
.
pop
(
'taxonomy'
)
else
:
gene_dict
=
self
.
_select_taxonomy
(
gene_dict
)
if
self
.
skip_functions
or
'unknown'
in
gene_dict
[
'functions'
]:
gene_dict
.
pop
(
'functions'
)
return
gene_dict
...
...
@@ -95,7 +95,7 @@ class ImportIGCGenes(object):
chunk_genes
=
list
(
islice
(
file
,
chunk_size
))
if
not
chunk_genes
:
break
genes
=
[
self
.
_clean_gene
(
self
.
_
select_taxonomy
(
self
.
_
parse_gene
(
i
))
)
for
i
in
chunk_genes
]
genes
=
[
self
.
_clean_gene
(
self
.
_parse_gene
(
i
))
for
i
in
chunk_genes
]
try
:
response
=
self
.
metagenedb_gene_api
.
put
(
genes
)
self
.
created_genes
+=
response
.
get
(
'created'
).
get
(
'count'
)
...
...
backend/scripts/populate_db/test_import_igc_data.py
View file @
220b598c
...
...
@@ -72,6 +72,7 @@ class TestCleanGene(TestCase):
def
setUp
(
self
):
self
.
import_igc_genes
=
ImportIGCGenes
(
'test'
,
'test'
)
self
.
import_igc_genes
.
_select_taxonomy
=
lambda
x
:
x
# Mock to return same dict
self
.
gene_dict
=
{
'gene_id'
:
'gene.01'
,
'length'
:
135
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment