Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Metagenomics
metagenedb
Commits
eb644605
Commit
eb644605
authored
Jun 30, 2020
by
Kenzo-Hugo Hillion
♻
Browse files
start script to create light db
parent
fce01755
Pipeline
#33212
failed with stages
in 3 minutes and 4 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
backend/metagenedb/apps/catalog/factory/taxonomy.py
View file @
eb644605
from
collections
import
OrderedDict
from
factory
import
DjangoModelFactory
,
fuzzy
from
faker
import
Factory
...
...
@@ -25,44 +27,47 @@ class DbGenerator:
self
.
created_ids
=
set
()
# store already created IDs to skip them
def
generate_db_from_tree
(
self
,
tree
):
"""
Tree need to be an OrderedDict from higher to lower level
"""
for
rank
,
desc
in
tree
.
items
():
if
desc
[
'tax_id'
]
not
in
self
.
created_ids
:
TaxonomyFactory
.
create
(
self
.
last_tax
=
TaxonomyFactory
.
create
(
tax_id
=
desc
[
'tax_id'
],
name
=
desc
[
'name'
],
rank
=
rank
,
parent
=
getattr
(
self
,
"last_tax"
,
None
)
)
self
.
created_ids
.
add
(
desc
[
'tax_id'
])
self
.
last_tax
.
build_hierarchy
()
def
_generate_lactobacillus_db
(
db_generator
):
"""
Generate db with few ranks corresponding to Lactobacillus genus
"""
tree
=
{
"class"
:
{
"name"
:
"Bacilli"
,
"tax_id"
:
"91061"
},
"genus"
:
{
"name"
:
"Lactobacillus"
,
"tax_id"
:
"1578"
},
"order"
:
{
"name"
:
"Lactobacillales"
,
"tax_id"
:
"186826"
},
"family"
:
{
"name"
:
"Lactobacillaceae"
,
"tax_id"
:
"33958"
},
"phylum"
:
{
"name"
:
"Firmicutes"
,
"tax_id"
:
"1239"
},
"no_rank"
:
{
"name"
:
"cellular organisms"
,
"tax_id"
:
"131567"
},
"superkingdom"
:
{
"name"
:
"Bacteria"
,
"tax_id"
:
"2"
},
"species_group"
:
{
"name"
:
"Lactobacillus casei group"
,
"tax_id"
:
"655183"
}
}
tree
=
OrderedDict
()
tree
[
'no_rank'
]
=
{
"name"
:
"root"
,
"tax_id"
:
"1"
}
tree
[
"superkingdom"
]
=
{
"name"
:
"Bacteria"
,
"tax_id"
:
"2"
}
tree
[
"phylum"
]
=
{
"name"
:
"Firmicutes"
,
"tax_id"
:
"1239"
}
tree
[
"class"
]
=
{
"name"
:
"Bacilli"
,
"tax_id"
:
"91061"
}
tree
[
"order"
]
=
{
"name"
:
"Lactobacillales"
,
"tax_id"
:
"186826"
}
tree
[
"family"
]
=
{
"name"
:
"Lactobacillaceae"
,
"tax_id"
:
"33958"
}
tree
[
"genus"
]
=
{
"name"
:
"Lactobacillus"
,
"tax_id"
:
"1578"
}
tree
[
"species_group"
]
=
{
"name"
:
"Lactobacillus casei group"
,
"tax_id"
:
"655183"
}
db_generator
.
generate_db_from_tree
(
tree
)
def
_generate_escherichia_db
(
db_generator
):
tree
=
{
"class"
:
{
"name"
:
"Gammaproteobacteria"
,
"tax_id"
:
"1236"
},
"genus"
:
{
"name"
:
"Escherichia"
,
"tax_id"
:
"561"
},
"order"
:
{
"name"
:
"Enterobacterales"
,
"tax_id"
:
"91347"
},
"family"
:
{
"name"
:
"Enterobacteriaceae"
,
"tax_id"
:
"543"
},
"phylum"
:
{
"name"
:
"Proteobacteria"
,
"tax_id"
:
"1224"
},
"no_rank"
:
{
"name"
:
"cellular organisms"
,
"tax_id"
:
"131567"
},
"species"
:
{
"name"
:
"Escherichia coli"
,
"tax_id"
:
"562"
},
"superkingdom"
:
{
"name"
:
"Bacteria"
,
"tax_id"
:
"2"
}
}
tree
=
OrderedDict
()
tree
[
"no_rank"
]
=
{
"name"
:
"root"
,
"tax_id"
:
"1"
}
tree
[
"superkingdom"
]
=
{
"name"
:
"Bacteria"
,
"tax_id"
:
"2"
}
tree
[
"phylum"
]
=
{
"name"
:
"Proteobacteria"
,
"tax_id"
:
"1224"
}
tree
[
"class"
]
=
{
"name"
:
"Gammaproteobacteria"
,
"tax_id"
:
"1236"
}
tree
[
"order"
]
=
{
"name"
:
"Enterobacterales"
,
"tax_id"
:
"91347"
}
tree
[
"family"
]
=
{
"name"
:
"Enterobacteriaceae"
,
"tax_id"
:
"543"
}
tree
[
"genus"
]
=
{
"name"
:
"Escherichia"
,
"tax_id"
:
"561"
}
tree
[
"species"
]
=
{
"name"
:
"Escherichia coli"
,
"tax_id"
:
"562"
}
db_generator
.
generate_db_from_tree
(
tree
)
...
...
backend/metagenedb/apps/catalog/management/commands/create_light_db.py
0 → 100644
View file @
eb644605
import
logging
from
django.core.management.base
import
BaseCommand
from
metagenedb.apps.catalog.factory.taxonomy
import
generate_simple_db
as
gen_tax_db
from
metagenedb.apps.catalog.models
import
(
Gene
,
KeggOrthology
,
Taxonomy
)
logging
.
basicConfig
(
format
=
'[%(asctime)s] %(levelname)s:%(name)s:%(message)s'
)
logger
=
logging
.
getLogger
(
__name__
)
def
create_functions_db
():
KeggOrthology
.
objects
.
all
().
delete
()
keggs_to_create
=
{
"K03556"
:
{
'name'
:
'malT'
,
'long_name'
:
"LuxR family transcriptional regulator, maltose regulon positive regulatory protein"
},
"K02229"
:
{
'name'
:
"cobG"
,
'long_name'
:
'precorrin-3B synthase [EC:1.14.13.83]'
}
}
for
kegg_id
,
values
in
keggs_to_create
.
items
():
KeggOrthology
(
function_id
=
kegg_id
,
name
=
values
.
get
(
'name'
),
long_name
=
values
.
get
(
'long_name'
)
).
save
()
def
create_taxonomy_db
():
Taxonomy
.
objects
.
all
().
delete
()
gen_tax_db
()
def
create_genes_db
():
pass
def
create_small_db
():
create_functions_db
()
create_taxonomy_db
()
create_genes_db
()
class
Command
(
BaseCommand
):
help
=
'Create a light DB with random items to illustrate functionnalities of the application.'
def
set_logger_level
(
self
,
verbosity
):
if
verbosity
>
2
:
logger
.
setLevel
(
logging
.
DEBUG
)
elif
verbosity
>
1
:
logger
.
setLevel
(
logging
.
INFO
)
def
handle
(
self
,
*
args
,
**
options
):
self
.
set_logger_level
(
int
(
options
[
'verbosity'
]))
create_small_db
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment