Select Git revision
test_import_igc_data.py
test_import_igc_data.py 10.80 KiB
from unittest import TestCase
from rest_framework.test import APITestCase
from metagenedb.common.utils.mocks.metagenedb import (
MetageneDBCatalogTaxonomyAPIMock,
MetageneDBCatalogEggNogAPIMock,
MetageneDBCatalogKeggOrthologyAPIMock
)
from metagenedb.apps.catalog.factory import (
TaxonomyFactory,
KeggOrthologyFactory,
EggNogFactory
)
from scripts.populate_db.import_igc_data import ImportIGCGenes
class TestParseGene(TestCase):
def setUp(self):
raw_data = [
'gene_id',
'gene_name',
'length',
'gene_completeness_status',
'cohort_origin',
'taxo_phylum',
'taxo_genus',
'kegg',
'eggnog',
'sample_occurence_freq',
'ind_occurence_freq',
'kegg_functional_cat',
'eggnog_functional_cat',
'cohort_assembled'
]
self.raw_line = "\t".join(raw_data)
self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
def test_parse_gene_default_selected_keys(self):
"""
This test should failed and need to be updated when SELECTED_KEYS are changed
"""
expected_dict = {
'gene_id': 'gene_name',
'length': 'length',
'kegg_ko': ['kegg'],
'eggnog': ['eggnog'],
'taxo_phylum': 'taxo_phylum',
'taxo_genus': 'taxo_genus',
}
tested_dict = self.import_igc_genes._parse_gene(self.raw_line)
self.assertDictEqual(tested_dict, expected_dict)
def test_parse_gene(self):
"""
This test should failed and need to be updated when SELECTED_KEYS are changed
"""
selected_keys = ['gene_id', 'length']
expected_dict = {
'gene_id': 'gene_name',
'length': 'length'
}
tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
self.assertDictEqual(tested_dict, expected_dict)
def test_parse_gene_unknown_key(self):
"""
Unknown key should be ignored
"""
selected_keys = ['gene_id', 'length', 'secret_code']
expected_dict = {
'gene_id': 'gene_name',
'length': 'length'
}
tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
self.assertDictEqual(tested_dict, expected_dict)
class TestCleanGene(TestCase):
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
self.import_igc_genes._select_taxonomy = lambda x: x # Mock to return same dict
self.import_igc_genes._clean_functions = lambda x: x
self.gene_dict = {
'gene_id': 'gene.01',
'length': 135,
'kegg_ko': ['K00001'],
'eggnog': ['COG1']
}
def test_clean_gene(self):
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'length': 135,
'functions': [
{'source': 'kegg', 'function_id': 'K00001'},
{'source': 'eggnog', 'function_id': 'COG1'}
]
}
test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
self.assertDictEqual(test_gene_dict, expected_gene_dict)
def test_clean_gene_skip_functions(self):
self.import_igc_genes.skip_functions = True
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'length': 135,
}
test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
self.assertDictEqual(test_gene_dict, expected_gene_dict)
def test_unknown_kegg_ko(self):
gene_dict = {
'gene_id': 'gene.01',
'length': 135,
'kegg_ko': ['unknown'],
'eggnog': ['COG1']
}
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'functions': [{'function_id': 'COG1', 'source': 'eggnog'}],
'length': 135
}
test_gene_dict = self.import_igc_genes._clean_gene(gene_dict)
self.assertDictEqual(test_gene_dict, expected_gene_dict)
def test_unknow_kegg_and_eggnog(self):
gene_dict = {
'gene_id': 'gene.01',
'length': 135,
'kegg_ko': ['unknown'],
'eggnog': ['unknown']
}
expected_gene_dict = {
'gene_id': 'gene-01',
'gene_name': 'gene.01',
'length': 135
}
test_gene_dict = self.import_igc_genes._clean_gene(gene_dict)
self.assertDictEqual(test_gene_dict, expected_gene_dict)
class TestCleanFunctions(TestCase):
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
self.import_igc_genes.metagenedb_eggnogs = set(['COG1', 'COG2'])
self.import_igc_genes.metagenedb_keggs = set(['K00001', 'K00002'])
def test_clean_functions(self):
functions = [
{'function_id': 'K00001', 'source': 'kegg'},
{'function_id': 'COG1', 'source': 'eggnog'}
]
expected_list = ['K00001', 'COG1']
self.assertListEqual(self.import_igc_genes._clean_functions(functions), expected_list)
def test_clean_functions_unknown_kegg(self):
functions = [
{'function_id': 'K00301', 'source': 'kegg'},
{'function_id': 'COG1', 'source': 'eggnog'}
]
expected_list = ['COG1']
self.assertListEqual(self.import_igc_genes._clean_functions(functions), expected_list)
class TestSelectTaxonomy(TestCase):
def setUp(self):
self.unknown_name = 'unknown'
self.genus_id = 'genus_1'
self.genus_name = 'Genus1'
self.phylum_id = 'phylum_1'
self.phylum_name = 'Phylum1'
self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
self.import_igc_genes.phylum_mapping = {
self.phylum_name: self.phylum_id
}
self.import_igc_genes.genus_mapping = {
self.genus_name: self.genus_id
}
def test_genus_only(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': self.unknown_name,
'taxo_genus': self.genus_name
}
expected_dict = {
'gene_id': 'gene',
'length': 135,
'taxonomy': self.genus_id
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
def test_genus_not_in_mapping(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': self.unknown_name,
'taxo_genus': "Genus2"
}
expected_dict = {
'gene_id': 'gene',
'length': 135
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
def test_phylum_only(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': self.phylum_name,
'taxo_genus': self.unknown_name
}
expected_dict = {
'gene_id': 'gene',
'length': 135,
'taxonomy': self.phylum_id
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
def test_phylum_not_in_mapping(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': "Phylum2",
'taxo_genus': self.unknown_name
}
expected_dict = {
'gene_id': 'gene',
'length': 135
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
def test_genus_phylum(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': self.phylum_name,
'taxo_genus': self.genus_name
}
expected_dict = {
'gene_id': 'gene',
'length': 135,
'taxonomy': self.genus_id
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
def test_both_unknown(self):
gene_dict = {
'gene_id': 'gene',
'length': 135,
'taxo_phylum': self.unknown_name,
'taxo_genus': self.unknown_name
}
expected_dict = {
'gene_id': 'gene',
'length': 135
}
tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
self.assertDictEqual(tested_dict, expected_dict)
class TestBuildTaxoMapping(APITestCase):
@classmethod
def setUpTestData(cls):
cls.genus_items = TaxonomyFactory.create_batch(200, rank='genus')
cls.phylum_items = TaxonomyFactory.create_batch(20, rank='phylum')
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client)
self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock
def test_build_mapping(self):
expected_genus_dict = {
item.name: item.tax_id for item in self.genus_items
}
expected_phylum_dict = {
item.name: item.tax_id for item in self.phylum_items
}
self.import_igc_genes.build_mapping(page_size=100)
self.assertDictEqual(self.import_igc_genes.genus_mapping, expected_genus_dict)
self.assertDictEqual(self.import_igc_genes.phylum_mapping, expected_phylum_dict)
class TestBuildBuildFunctionCatalog(APITestCase):
@classmethod
def setUpTestData(cls):
cls.keggs = KeggOrthologyFactory.create_batch(100)
cls.eggnogs = EggNogFactory.create_batch(100)
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
self.kegg_api_mock = MetageneDBCatalogKeggOrthologyAPIMock(self.client)
self.eggnog_api_mock = MetageneDBCatalogEggNogAPIMock(self.client)
self.import_igc_genes.metagenedb_kegg_api = self.kegg_api_mock
self.import_igc_genes.metagenedb_eggnog_api = self.eggnog_api_mock
def test_build_catalog(self):
expected_kegg_catalog = set(
[function.function_id for function in self.keggs]
)
expected_eggnog_catalog = set(
[function.function_id for function in self.eggnogs]
)
self.import_igc_genes.build_function_mappings(page_size=100)
self.assertSetEqual(self.import_igc_genes.metagenedb_keggs, expected_kegg_catalog)
self.assertSetEqual(self.import_igc_genes.metagenedb_eggnogs, expected_eggnog_catalog)