test_igc.py 2.87 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
from unittest import TestCase

from metagenedb.common.utils.parsers import IGCLineParser


class TestIGCLineParser(TestCase):

    def test_gene(self):
        raw_data = [
            'gene_id',
            'gene_name',
12
            'length',
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
            'gene_completeness_status',
            'cohort_origin',
            'taxo_phylum',
            'taxo_genus',
            'kegg',
            'eggnog',
            'sample_occurence_freq',
            'ind_occurence_freq',
            'kegg_functional_cat',
            'eggnog_functional_cat',
            'cohort_assembled'
        ]
        raw_line = "\t".join(raw_data)
        expected_dict = {
            'igc_id': raw_data[0],
            'gene_id': raw_data[1],
29
            'length': raw_data[2],
30
31
32
33
            'gene_completeness_status': raw_data[3],
            'cohort_origin': raw_data[4],
            'taxo_phylum': raw_data[5],
            'taxo_genus': raw_data[6],
34
            'kegg_ko': [raw_data[7]],
35
            'eggnog': [raw_data[8]],
36
37
38
39
40
41
42
43
44
45
46
47
48
            'sample_occurence_frequency': raw_data[9],
            'individual_occurence_frequency': raw_data[10],
            'kegg_functional_categories': raw_data[11],
            'eggnog_functional_categories': raw_data[12],
            'cohort_assembled': raw_data[13]
        }
        test_dict = IGCLineParser.gene(raw_line)
        self.assertDictEqual(test_dict, expected_dict)

    def test_gene_wrong_format(self):
        raw_line = "This is a wrong line format, with; information   and tab"
        with self.assertRaises(Exception) as context:  # noqa
            IGCLineParser.gene(raw_line)
49
50
51
52
53
54
55
56
57
58
59

    def test_multiple_functions(self):
        raw_data = [
            'gene_id',
            'gene_name',
            'length',
            'gene_completeness_status',
            'cohort_origin',
            'taxo_phylum',
            'taxo_genus',
            'kegg;kegg2',
60
            'eggnog1;eggnog2',
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
            'sample_occurence_freq',
            'ind_occurence_freq',
            'kegg_functional_cat',
            'eggnog_functional_cat',
            'cohort_assembled'
        ]
        raw_line = "\t".join(raw_data)
        expected_dict = {
            'igc_id': raw_data[0],
            'gene_id': raw_data[1],
            'length': raw_data[2],
            'gene_completeness_status': raw_data[3],
            'cohort_origin': raw_data[4],
            'taxo_phylum': raw_data[5],
            'taxo_genus': raw_data[6],
            'kegg_ko': ['kegg', 'kegg2'],
77
            'eggnog': ['eggnog1', 'eggnog2'],
78
79
80
81
82
83
84
85
            'sample_occurence_frequency': raw_data[9],
            'individual_occurence_frequency': raw_data[10],
            'kegg_functional_categories': raw_data[11],
            'eggnog_functional_categories': raw_data[12],
            'cohort_assembled': raw_data[13]
        }
        test_dict = IGCLineParser.gene(raw_line)
        self.assertDictEqual(test_dict, expected_dict)