Commit 5635b146 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

Update way of getting taxonomy while importing IGC genes

parent 04881127
Pipeline #18611 passed with stages
in 2 minutes and 15 seconds
from factory import DjangoModelFactory, fuzzy, Faker
from factory import DjangoModelFactory, fuzzy
from faker import Factory
from metagenedb.apps.catalog import models
......
......@@ -74,17 +74,17 @@ class ImportIGCGenes(object):
genus = gene_dict.pop(self.GENUS_COL)
if self.skip_tax:
return gene_dict
resp_dict = {}
taxonomy_id = None
if genus != unknown_val:
resp_dict = self.metagenedb_taxonomy_api.get_all(params={'name': genus, 'rank': 'genus'})
if len(resp_dict['results']) > 1:
logger.warning(f"More than 1 result found for genus {genus}. First result is kept.")
taxonomy_id = self.genus_mapping.get(genus, None)
if taxonomy_id is None:
logger.warning("No tax_id found for genus %s" % genus)
elif phylum != unknown_val:
resp_dict = self.metagenedb_taxonomy_api.get_all(params={'name': phylum, 'rank': 'phylum'})
if len(resp_dict['results']) > 1:
logger.warning(f"More than 1 result found for phylum {phylum}. First result is kept.")
if resp_dict.get('count', 0) > 0:
gene_dict.update({'taxonomy': resp_dict['results'][0]['tax_id']})
taxonomy_id = self.phylum_mapping.get(phylum, None)
if taxonomy_id is None:
logger.warning("No tax_id found for phylum %s" % genus)
if taxonomy_id is not None:
gene_dict.update({'taxonomy': taxonomy_id})
return gene_dict
def _parse_gene(self, raw_line, selected_keys=SELECTED_KEYS):
......
......@@ -114,20 +114,21 @@ class TestCleanGene(TestCase):
self.assertDictEqual(test_gene_dict, expected_gene_dict)
class TestSelectTaxonomy(APITestCase):
@classmethod
def setUpTestData(cls):
cls.genus_name = 'Genus'
cls.phylum_name = 'Phylum'
cls.unknown_name = 'unknown'
cls.genus = TaxonomyFactory(rank="genus", name=cls.genus_name)
cls.phylum = TaxonomyFactory(rank="phylum", name=cls.phylum_name)
class TestSelectTaxonomy(TestCase):
def setUp(self):
self.unknown_name = 'unknown'
self.genus_id = 'genus_1'
self.genus_name = 'Genus1'
self.phylum_id = 'phylum_1'
self.phylum_name = 'Phylum1'
self.import_igc_genes = ImportIGCGenes('test', 'test')
self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client)
self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock
self.import_igc_genes.phylum_mapping = {
self.phylum_name: self.phylum_id
}
self.import_igc_genes.genus_mapping = {
self.genus_name: self.genus_id
}
def test_genus_only(self):
gene_dict = {
......@@ -139,7 +140,21 @@ class TestSelectTaxonomy(APITestCase):
expected_dict = {
'gene_id': 'gene',
'length': 135,
'taxonomy': str(self.genus.tax_id)
'taxonomy': self.genus_id
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
def test_genus_not_in_mapping(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': self.unknown_name,
'taxo_genus': "Genus2"
}
expected_dict = {
'gene_id': 'gene',
'length': 135
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
......@@ -154,7 +169,21 @@ class TestSelectTaxonomy(APITestCase):
expected_dict = {
'gene_id': 'gene',
'length': 135,
'taxonomy': str(self.phylum.tax_id)
'taxonomy': self.phylum_id
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
def test_phylum_not_in_mapping(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': "Phylum2",
'taxo_genus': self.unknown_name
}
expected_dict = {
'gene_id': 'gene',
'length': 135
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
......@@ -169,7 +198,7 @@ class TestSelectTaxonomy(APITestCase):
expected_dict = {
'gene_id': 'gene',
'length': 135,
'taxonomy': str(self.genus.tax_id)
'taxonomy': self.genus_id
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment