Commit 04881127 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

Add method to build local mapping for genus and phylum

parent 52f4fa56
from factory import DjangoModelFactory, fuzzy
from factory import DjangoModelFactory, fuzzy, Faker
from faker import Factory
from metagenedb.apps.catalog import models
......@@ -16,3 +16,4 @@ class TaxonomyFactory(DjangoModelFactory):
rank = fuzzy.FuzzyChoice(SELECTED_RANK)
tax_id = FuzzyLowerText(prefix='tax-', length=15)
name = fuzzy.FuzzyText(length=20)
......@@ -34,6 +34,23 @@ class ImportIGCGenes(object):
self.skip_tax = skip_tax
self.skip_functions = skip_functions
def _build_taxo_mapping(self, rank):
counter = 1
next_page = None
mapping = {}
while counter == 1 or next_page is not None:
current_page = self.metagenedb_taxonomy_api.get_all(params={'page': counter, 'rank': rank})
next_page = current_page['next']
mapping.update({
value['name']: value['tax_id'] for value in current_page['results']
})
counter += 1
return mapping
def build_mapping(self):
self.phylum_mapping = self._build_taxo_mapping("phylum")
self.genus_mapping = self._build_taxo_mapping("genus")
def _reset_counters(self):
self.processed_genes = 0
self.created_genes = 0
......@@ -89,6 +106,8 @@ class ImportIGCGenes(object):
return gene_dict
def load_annotation_file_to_db_in_chunks(self, chunk_size=1000, test=False):
# Build mapping for different phylum and genus
self.build_mapping()
with open(self.annotation_file, 'r') as file:
while True:
chunk_genes = list(islice(file, chunk_size))
......
......@@ -116,12 +116,15 @@ class TestCleanGene(TestCase):
class TestSelectTaxonomy(APITestCase):
@classmethod
def setUpTestData(cls):
cls.genus_name = 'Genus'
cls.phylum_name = 'Phylum'
cls.unknown_name = 'unknown'
cls.genus = TaxonomyFactory(rank="genus", name=cls.genus_name)
cls.phylum = TaxonomyFactory(rank="phylum", name=cls.phylum_name)
def setUp(self):
self.genus_name = 'Genus'
self.phylum_name = 'Phylum'
self.unknown_name = 'unknown'
self.genus = TaxonomyFactory(rank="genus", name=self.genus_name)
self.phylum = TaxonomyFactory(rank="phylum", name=self.phylum_name)
self.import_igc_genes = ImportIGCGenes('test', 'test')
self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client)
self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock
......@@ -184,3 +187,27 @@ class TestSelectTaxonomy(APITestCase):
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
class TestBuildTaxoMapping(APITestCase):
@classmethod
def setUpTestData(cls):
cls.genus_items = TaxonomyFactory.create_batch(200, rank='genus')
cls.phylum_items = TaxonomyFactory.create_batch(20, rank='phylum')
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test')
self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client)
self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock
def test_build_mapping(self):
expected_genus_dict = {
item.name: item.tax_id for item in self.genus_items
}
expected_phylum_dict = {
item.name: item.tax_id for item in self.phylum_items
}
self.import_igc_genes.build_mapping()
self.assertDictEqual(self.import_igc_genes.genus_mapping, expected_genus_dict)
self.assertDictEqual(self.import_igc_genes.phylum_mapping, expected_phylum_dict)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment