Skip to content
Snippets Groups Projects
Select Git revision
  • db4cb890321024ad64dc81f653bdc1940208a128
  • dev default
  • improve-source
  • improve-db-queries
  • master protected
5 results

test_import_igc_data.py

Blame
  • test_import_igc_data.py 10.80 KiB
    from unittest import TestCase
    
    from rest_framework.test import APITestCase
    
    from metagenedb.common.utils.mocks.metagenedb import (
        MetageneDBCatalogTaxonomyAPIMock,
        MetageneDBCatalogEggNogAPIMock,
        MetageneDBCatalogKeggOrthologyAPIMock
    )
    from metagenedb.apps.catalog.factory import (
        TaxonomyFactory,
        KeggOrthologyFactory,
        EggNogFactory
    )
    from scripts.populate_db.import_igc_data import ImportIGCGenes
    
    
    class TestParseGene(TestCase):
    
        def setUp(self):
            raw_data = [
                'gene_id',
                'gene_name',
                'length',
                'gene_completeness_status',
                'cohort_origin',
                'taxo_phylum',
                'taxo_genus',
                'kegg',
                'eggnog',
                'sample_occurence_freq',
                'ind_occurence_freq',
                'kegg_functional_cat',
                'eggnog_functional_cat',
                'cohort_assembled'
            ]
            self.raw_line = "\t".join(raw_data)
            self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
    
        def test_parse_gene_default_selected_keys(self):
            """
            This test should failed and need to be updated when SELECTED_KEYS are changed
            """
            expected_dict = {
                'gene_id': 'gene_name',
                'length': 'length',
                'kegg_ko': ['kegg'],
                'eggnog': ['eggnog'],
                'taxo_phylum': 'taxo_phylum',
                'taxo_genus': 'taxo_genus',
            }
            tested_dict = self.import_igc_genes._parse_gene(self.raw_line)
            self.assertDictEqual(tested_dict, expected_dict)
    
        def test_parse_gene(self):
            """
            This test should failed and need to be updated when SELECTED_KEYS are changed
            """
            selected_keys = ['gene_id', 'length']
            expected_dict = {
                'gene_id': 'gene_name',
                'length': 'length'
            }
            tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
            self.assertDictEqual(tested_dict, expected_dict)
    
        def test_parse_gene_unknown_key(self):
            """
            Unknown key should be ignored
            """
            selected_keys = ['gene_id', 'length', 'secret_code']
            expected_dict = {
                'gene_id': 'gene_name',
                'length': 'length'
            }
            tested_dict = self.import_igc_genes._parse_gene(self.raw_line, selected_keys=selected_keys)
            self.assertDictEqual(tested_dict, expected_dict)
    
    
    class TestCleanGene(TestCase):
    
        def setUp(self):
            self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
            self.import_igc_genes._select_taxonomy = lambda x: x  # Mock to return same dict
            self.import_igc_genes._clean_functions = lambda x: x
            self.gene_dict = {
                'gene_id': 'gene.01',
                'length': 135,
                'kegg_ko': ['K00001'],
                'eggnog': ['COG1']
            }
    
        def test_clean_gene(self):
            expected_gene_dict = {
                'gene_id': 'gene-01',
                'gene_name': 'gene.01',
                'length': 135,
                'functions': [
                    {'source': 'kegg', 'function_id': 'K00001'},
                    {'source': 'eggnog', 'function_id': 'COG1'}
                ]
            }
            test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
            self.assertDictEqual(test_gene_dict, expected_gene_dict)
    
        def test_clean_gene_skip_functions(self):
            self.import_igc_genes.skip_functions = True
            expected_gene_dict = {
                'gene_id': 'gene-01',
                'gene_name': 'gene.01',
                'length': 135,
            }
            test_gene_dict = self.import_igc_genes._clean_gene(self.gene_dict)
            self.assertDictEqual(test_gene_dict, expected_gene_dict)
    
        def test_unknown_kegg_ko(self):
            gene_dict = {
                'gene_id': 'gene.01',
                'length': 135,
                'kegg_ko': ['unknown'],
                'eggnog': ['COG1']
            }
            expected_gene_dict = {
                'gene_id': 'gene-01',
                'gene_name': 'gene.01',
                'functions': [{'function_id': 'COG1', 'source': 'eggnog'}],
                'length': 135
            }
            test_gene_dict = self.import_igc_genes._clean_gene(gene_dict)
            self.assertDictEqual(test_gene_dict, expected_gene_dict)
    
        def test_unknow_kegg_and_eggnog(self):
            gene_dict = {
                'gene_id': 'gene.01',
                'length': 135,
                'kegg_ko': ['unknown'],
                'eggnog': ['unknown']
            }
            expected_gene_dict = {
                'gene_id': 'gene-01',
                'gene_name': 'gene.01',
                'length': 135
            }
            test_gene_dict = self.import_igc_genes._clean_gene(gene_dict)
            self.assertDictEqual(test_gene_dict, expected_gene_dict)
    
    
    class TestCleanFunctions(TestCase):
    
        def setUp(self):
            self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
            self.import_igc_genes.metagenedb_eggnogs = set(['COG1', 'COG2'])
            self.import_igc_genes.metagenedb_keggs = set(['K00001', 'K00002'])
    
        def test_clean_functions(self):
            functions = [
                {'function_id': 'K00001', 'source': 'kegg'},
                {'function_id': 'COG1', 'source': 'eggnog'}
            ]
            expected_list = ['K00001', 'COG1']
            self.assertListEqual(self.import_igc_genes._clean_functions(functions), expected_list)
    
        def test_clean_functions_unknown_kegg(self):
            functions = [
                {'function_id': 'K00301', 'source': 'kegg'},
                {'function_id': 'COG1', 'source': 'eggnog'}
            ]
            expected_list = ['COG1']
            self.assertListEqual(self.import_igc_genes._clean_functions(functions), expected_list)
    
    
    class TestSelectTaxonomy(TestCase):
    
        def setUp(self):
            self.unknown_name = 'unknown'
            self.genus_id = 'genus_1'
            self.genus_name = 'Genus1'
            self.phylum_id = 'phylum_1'
            self.phylum_name = 'Phylum1'
            self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
            self.import_igc_genes.phylum_mapping = {
                self.phylum_name: self.phylum_id
            }
            self.import_igc_genes.genus_mapping = {
                self.genus_name: self.genus_id
            }
    
        def test_genus_only(self):
            gene_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxo_phylum': self.unknown_name,
                'taxo_genus': self.genus_name
            }
            expected_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxonomy': self.genus_id
            }
            tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
            self.assertDictEqual(tested_dict, expected_dict)
    
        def test_genus_not_in_mapping(self):
            gene_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxo_phylum': self.unknown_name,
                'taxo_genus': "Genus2"
            }
            expected_dict = {
                'gene_id': 'gene',
                'length': 135
            }
            tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
            self.assertDictEqual(tested_dict, expected_dict)
    
        def test_phylum_only(self):
            gene_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxo_phylum': self.phylum_name,
                'taxo_genus': self.unknown_name
            }
            expected_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxonomy': self.phylum_id
            }
            tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
            self.assertDictEqual(tested_dict, expected_dict)
    
        def test_phylum_not_in_mapping(self):
            gene_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxo_phylum': "Phylum2",
                'taxo_genus': self.unknown_name
            }
            expected_dict = {
                'gene_id': 'gene',
                'length': 135
            }
            tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
            self.assertDictEqual(tested_dict, expected_dict)
    
        def test_genus_phylum(self):
            gene_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxo_phylum': self.phylum_name,
                'taxo_genus': self.genus_name
            }
            expected_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxonomy': self.genus_id
            }
            tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
            self.assertDictEqual(tested_dict, expected_dict)
    
        def test_both_unknown(self):
            gene_dict = {
                'gene_id': 'gene',
                'length': 135,
                'taxo_phylum': self.unknown_name,
                'taxo_genus': self.unknown_name
            }
            expected_dict = {
                'gene_id': 'gene',
                'length': 135
            }
            tested_dict = self.import_igc_genes._select_taxonomy(gene_dict)
            self.assertDictEqual(tested_dict, expected_dict)
    
    
    class TestBuildTaxoMapping(APITestCase):
    
        @classmethod
        def setUpTestData(cls):
            cls.genus_items = TaxonomyFactory.create_batch(200, rank='genus')
            cls.phylum_items = TaxonomyFactory.create_batch(20, rank='phylum')
    
        def setUp(self):
            self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
            self.api_mock = MetageneDBCatalogTaxonomyAPIMock(self.client)
            self.import_igc_genes.metagenedb_taxonomy_api = self.api_mock
    
        def test_build_mapping(self):
            expected_genus_dict = {
                item.name: item.tax_id for item in self.genus_items
            }
            expected_phylum_dict = {
                item.name: item.tax_id for item in self.phylum_items
            }
            self.import_igc_genes.build_mapping(page_size=100)
            self.assertDictEqual(self.import_igc_genes.genus_mapping, expected_genus_dict)
            self.assertDictEqual(self.import_igc_genes.phylum_mapping, expected_phylum_dict)
    
    
    class TestBuildBuildFunctionCatalog(APITestCase):
    
        @classmethod
        def setUpTestData(cls):
            cls.keggs = KeggOrthologyFactory.create_batch(100)
            cls.eggnogs = EggNogFactory.create_batch(100)
    
        def setUp(self):
            self.import_igc_genes = ImportIGCGenes('test', 'test_url', 'test_token')
            self.kegg_api_mock = MetageneDBCatalogKeggOrthologyAPIMock(self.client)
            self.eggnog_api_mock = MetageneDBCatalogEggNogAPIMock(self.client)
            self.import_igc_genes.metagenedb_kegg_api = self.kegg_api_mock
            self.import_igc_genes.metagenedb_eggnog_api = self.eggnog_api_mock
    
        def test_build_catalog(self):
            expected_kegg_catalog = set(
                [function.function_id for function in self.keggs]
            )
            expected_eggnog_catalog = set(
                [function.function_id for function in self.eggnogs]
            )
            self.import_igc_genes.build_function_mappings(page_size=100)
            self.assertSetEqual(self.import_igc_genes.metagenedb_keggs, expected_kegg_catalog)
            self.assertSetEqual(self.import_igc_genes.metagenedb_eggnogs, expected_eggnog_catalog)