Commit c9eb07d8 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

[quick fix] allow creating gene with several KEGG from IGC

parent db3f33e9
......@@ -37,7 +37,7 @@ class IGCLineParser(object):
'cohort_origin': gene_info[4],
'taxo_phylum': gene_info[5],
'taxo_genus': gene_info[6],
'kegg_ko': gene_info[7],
'kegg_ko': gene_info[7].split(';'),
'eggnog': gene_info[8],
'sample_occurence_frequency': gene_info[9],
'individual_occurence_frequency': gene_info[10],
......
......@@ -31,7 +31,7 @@ class TestIGCLineParser(TestCase):
'cohort_origin': raw_data[4],
'taxo_phylum': raw_data[5],
'taxo_genus': raw_data[6],
'kegg_ko': raw_data[7],
'kegg_ko': [raw_data[7]],
'eggnog': raw_data[8],
'sample_occurence_frequency': raw_data[9],
'individual_occurence_frequency': raw_data[10],
......@@ -46,3 +46,40 @@ class TestIGCLineParser(TestCase):
raw_line = "This is a wrong line format, with; information and tab"
with self.assertRaises(Exception) as context: # noqa
IGCLineParser.gene(raw_line)
def test_multiple_functions(self):
raw_data = [
'gene_id',
'gene_name',
'length',
'gene_completeness_status',
'cohort_origin',
'taxo_phylum',
'taxo_genus',
'kegg;kegg2',
'eggnog',
'sample_occurence_freq',
'ind_occurence_freq',
'kegg_functional_cat',
'eggnog_functional_cat',
'cohort_assembled'
]
raw_line = "\t".join(raw_data)
expected_dict = {
'igc_id': raw_data[0],
'gene_id': raw_data[1],
'length': raw_data[2],
'gene_completeness_status': raw_data[3],
'cohort_origin': raw_data[4],
'taxo_phylum': raw_data[5],
'taxo_genus': raw_data[6],
'kegg_ko': ['kegg', 'kegg2'],
'eggnog': raw_data[8],
'sample_occurence_frequency': raw_data[9],
'individual_occurence_frequency': raw_data[10],
'kegg_functional_categories': raw_data[11],
'eggnog_functional_categories': raw_data[12],
'cohort_assembled': raw_data[13]
}
test_dict = IGCLineParser.gene(raw_line)
self.assertDictEqual(test_dict, expected_dict)
......@@ -82,7 +82,7 @@ class ImportIGCGenes(object):
def _clean_gene(self, gene_dict):
gene_dict['gene_name'] = gene_dict['gene_id']
gene_dict['gene_id'] = slugify(gene_dict['gene_id'])
gene_dict['functions'] = [gene_dict.pop('kegg_ko')]
gene_dict['functions'] = gene_dict.pop('kegg_ko')
if self.skip_tax:
gene_dict.pop('taxonomy')
if self.skip_functions or 'unknown' in gene_dict['functions']:
......@@ -102,6 +102,7 @@ class ImportIGCGenes(object):
self.updated_genes += response.get('updated').get('count')
except HTTPError as http_error:
logging.warning("%s: %s; %s", http_error, http_error.response.json(), genes)
self.skipped_genes += len(genes)
self.processed_genes += len(chunk_genes)
logger.info("%s Genes processed so far...", self.processed_genes)
logger.info("[DONE] %s/%s Genes created.", self.created_genes, self.total_genes)
......
......@@ -36,7 +36,7 @@ class TestParseGene(TestCase):
expected_dict = {
'gene_id': 'gene_name',
'length': 'length',
'kegg_ko': 'kegg',
'kegg_ko': ['kegg'],
'taxo_phylum': 'taxo_phylum',
'taxo_genus': 'taxo_genus',
}
......@@ -76,7 +76,7 @@ class TestCleanGene(TestCase):
'gene_id': 'gene.01',
'length': 135,
'taxonomy': 'Taxo',
'kegg_ko': 'K00001'
'kegg_ko': ['K00001']
}
def test_clean_gene(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment