Skip to content
Snippets Groups Projects
Commit 04881127 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion :recycle:
Browse files

Add method to build local mapping for genus and phylum

parent 52f4fa56
No related branches found
No related tags found
2 merge requests!59Prod,!19mprove the way mapping between taxonomy names and id is done during IGC gene creation
from factory import DjangoModelFactory, fuzzy from factory import DjangoModelFactory, fuzzy, Faker
from faker import Factory from faker import Factory
from metagenedb.apps.catalog import models from metagenedb.apps.catalog import models
...@@ -16,3 +16,4 @@ class TaxonomyFactory(DjangoModelFactory): ...@@ -16,3 +16,4 @@ class TaxonomyFactory(DjangoModelFactory):
rank = fuzzy.FuzzyChoice(SELECTED_RANK) rank = fuzzy.FuzzyChoice(SELECTED_RANK)
tax_id = FuzzyLowerText(prefix='tax-', length=15) tax_id = FuzzyLowerText(prefix='tax-', length=15)
name = fuzzy.FuzzyText(length=20)
...@@ -34,6 +34,23 @@ class ImportIGCGenes(object): ...@@ -34,6 +34,23 @@ class ImportIGCGenes(object):
self.skip_tax = skip_tax self.skip_tax = skip_tax
self.skip_functions = skip_functions self.skip_functions = skip_functions
def _build_taxo_mapping(self, rank):
counter = 1
next_page = None
mapping = {}
while counter == 1 or next_page is not None:
current_page = self.metagenedb_taxonomy_api.get_all(params={'page': counter, 'rank': rank})
next_page = current_page['next']
mapping.update({
value['name']: value['tax_id'] for value in current_page['results']
})
counter += 1
return mapping
def build_mapping(self):
self.phylum_mapping = self._build_taxo_mapping("phylum")
self.genus_mapping = self._build_taxo_mapping("genus")
def _reset_counters(self): def _reset_counters(self):
self.processed_genes = 0 self.processed_genes = 0
self.created_genes = 0 self.created_genes = 0
...@@ -89,6 +106,8 @@ class ImportIGCGenes(object): ...@@ -89,6 +106,8 @@ class ImportIGCGenes(object):
return gene_dict return gene_dict
def load_annotation_file_to_db_in_chunks(self, chunk_size=1000, test=False): def load_annotation_file_to_db_in_chunks(self, chunk_size=1000, test=False):
# Build mapping for different phylum and genus
self.build_mapping()
with open(self.annotation_file, 'r') as file: with open(self.annotation_file, 'r') as file:
while True: while True:
chunk_genes = list(islice(file, chunk_size)) chunk_genes = list(islice(file, chunk_size))
......
...@@ -116,12 +116,15 @@ class TestCleanGene(TestCase): ...@@ -116,12 +116,15 @@ class TestCleanGene(TestCase):
class TestSelectTaxonomy(APITestCase): class TestSelectTaxonomy(APITestCase):
@classmethod
def setUpTestData(cls):
cls.genus_name = 'Genus'
cls.phylum_name = 'Phylum'
cls.unknown_name = 'unknown'
cls.genus = TaxonomyFactory(rank="genus", name=cls.genus_name)
cls.phylum = TaxonomyFactory(rank="phylum", name=cls.phylum_name)
def setUp(self): def setUp(self):
self.genus_name = 'Genus'
self.phylum_name = 'Phylum'
self.unknown_name = 'unknown'
self.genus = TaxonomyFactory(rank="genus", name=self.genus_name)
self.phylum = TaxonomyFactory(rank="phylum", name=self.phylum_name)
self.import_igc_genes = ImportIGCGenes('test', 'test') self.import_igc_genes = ImportIGCGenes('test', 'test')
self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client) self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client)
self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock
...@@ -184,3 +187,27 @@ class TestSelectTaxonomy(APITestCase): ...@@ -184,3 +187,27 @@ class TestSelectTaxonomy(APITestCase):
} }
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict) tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict) self.assertDictEqual(tested_dict, expected_dict)
class TestBuildTaxoMapping(APITestCase):
@classmethod
def setUpTestData(cls):
cls.genus_items = TaxonomyFactory.create_batch(200, rank='genus')
cls.phylum_items = TaxonomyFactory.create_batch(20, rank='phylum')
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test')
self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client)
self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock
def test_build_mapping(self):
expected_genus_dict = {
item.name: item.tax_id for item in self.genus_items
}
expected_phylum_dict = {
item.name: item.tax_id for item in self.phylum_items
}
self.import_igc_genes.build_mapping()
self.assertDictEqual(self.import_igc_genes.genus_mapping, expected_genus_dict)
self.assertDictEqual(self.import_igc_genes.phylum_mapping, expected_phylum_dict)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment