Skip to content
Snippets Groups Projects
Commit c9eb07d8 authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion :recycle:
Browse files

[quick fix] allow creating gene with several KEGG from IGC

parent db3f33e9
No related branches found
No related tags found
2 merge requests!59Prod,!18Resolve "Create backend service to perform request to external APIs"
...@@ -37,7 +37,7 @@ class IGCLineParser(object): ...@@ -37,7 +37,7 @@ class IGCLineParser(object):
'cohort_origin': gene_info[4], 'cohort_origin': gene_info[4],
'taxo_phylum': gene_info[5], 'taxo_phylum': gene_info[5],
'taxo_genus': gene_info[6], 'taxo_genus': gene_info[6],
'kegg_ko': gene_info[7], 'kegg_ko': gene_info[7].split(';'),
'eggnog': gene_info[8], 'eggnog': gene_info[8],
'sample_occurence_frequency': gene_info[9], 'sample_occurence_frequency': gene_info[9],
'individual_occurence_frequency': gene_info[10], 'individual_occurence_frequency': gene_info[10],
......
...@@ -31,7 +31,7 @@ class TestIGCLineParser(TestCase): ...@@ -31,7 +31,7 @@ class TestIGCLineParser(TestCase):
'cohort_origin': raw_data[4], 'cohort_origin': raw_data[4],
'taxo_phylum': raw_data[5], 'taxo_phylum': raw_data[5],
'taxo_genus': raw_data[6], 'taxo_genus': raw_data[6],
'kegg_ko': raw_data[7], 'kegg_ko': [raw_data[7]],
'eggnog': raw_data[8], 'eggnog': raw_data[8],
'sample_occurence_frequency': raw_data[9], 'sample_occurence_frequency': raw_data[9],
'individual_occurence_frequency': raw_data[10], 'individual_occurence_frequency': raw_data[10],
...@@ -46,3 +46,40 @@ class TestIGCLineParser(TestCase): ...@@ -46,3 +46,40 @@ class TestIGCLineParser(TestCase):
raw_line = "This is a wrong line format, with; information and tab" raw_line = "This is a wrong line format, with; information and tab"
with self.assertRaises(Exception) as context: # noqa with self.assertRaises(Exception) as context: # noqa
IGCLineParser.gene(raw_line) IGCLineParser.gene(raw_line)
def test_multiple_functions(self):
raw_data = [
'gene_id',
'gene_name',
'length',
'gene_completeness_status',
'cohort_origin',
'taxo_phylum',
'taxo_genus',
'kegg;kegg2',
'eggnog',
'sample_occurence_freq',
'ind_occurence_freq',
'kegg_functional_cat',
'eggnog_functional_cat',
'cohort_assembled'
]
raw_line = "\t".join(raw_data)
expected_dict = {
'igc_id': raw_data[0],
'gene_id': raw_data[1],
'length': raw_data[2],
'gene_completeness_status': raw_data[3],
'cohort_origin': raw_data[4],
'taxo_phylum': raw_data[5],
'taxo_genus': raw_data[6],
'kegg_ko': ['kegg', 'kegg2'],
'eggnog': raw_data[8],
'sample_occurence_frequency': raw_data[9],
'individual_occurence_frequency': raw_data[10],
'kegg_functional_categories': raw_data[11],
'eggnog_functional_categories': raw_data[12],
'cohort_assembled': raw_data[13]
}
test_dict = IGCLineParser.gene(raw_line)
self.assertDictEqual(test_dict, expected_dict)
...@@ -82,7 +82,7 @@ class ImportIGCGenes(object): ...@@ -82,7 +82,7 @@ class ImportIGCGenes(object):
def _clean_gene(self, gene_dict): def _clean_gene(self, gene_dict):
gene_dict['gene_name'] = gene_dict['gene_id'] gene_dict['gene_name'] = gene_dict['gene_id']
gene_dict['gene_id'] = slugify(gene_dict['gene_id']) gene_dict['gene_id'] = slugify(gene_dict['gene_id'])
gene_dict['functions'] = [gene_dict.pop('kegg_ko')] gene_dict['functions'] = gene_dict.pop('kegg_ko')
if self.skip_tax: if self.skip_tax:
gene_dict.pop('taxonomy') gene_dict.pop('taxonomy')
if self.skip_functions or 'unknown' in gene_dict['functions']: if self.skip_functions or 'unknown' in gene_dict['functions']:
...@@ -102,6 +102,7 @@ class ImportIGCGenes(object): ...@@ -102,6 +102,7 @@ class ImportIGCGenes(object):
self.updated_genes += response.get('updated').get('count') self.updated_genes += response.get('updated').get('count')
except HTTPError as http_error: except HTTPError as http_error:
logging.warning("%s: %s; %s", http_error, http_error.response.json(), genes) logging.warning("%s: %s; %s", http_error, http_error.response.json(), genes)
self.skipped_genes += len(genes)
self.processed_genes += len(chunk_genes) self.processed_genes += len(chunk_genes)
logger.info("%s Genes processed so far...", self.processed_genes) logger.info("%s Genes processed so far...", self.processed_genes)
logger.info("[DONE] %s/%s Genes created.", self.created_genes, self.total_genes) logger.info("[DONE] %s/%s Genes created.", self.created_genes, self.total_genes)
......
...@@ -36,7 +36,7 @@ class TestParseGene(TestCase): ...@@ -36,7 +36,7 @@ class TestParseGene(TestCase):
expected_dict = { expected_dict = {
'gene_id': 'gene_name', 'gene_id': 'gene_name',
'length': 'length', 'length': 'length',
'kegg_ko': 'kegg', 'kegg_ko': ['kegg'],
'taxo_phylum': 'taxo_phylum', 'taxo_phylum': 'taxo_phylum',
'taxo_genus': 'taxo_genus', 'taxo_genus': 'taxo_genus',
} }
...@@ -76,7 +76,7 @@ class TestCleanGene(TestCase): ...@@ -76,7 +76,7 @@ class TestCleanGene(TestCase):
'gene_id': 'gene.01', 'gene_id': 'gene.01',
'length': 135, 'length': 135,
'taxonomy': 'Taxo', 'taxonomy': 'Taxo',
'kegg_ko': 'K00001' 'kegg_ko': ['K00001']
} }
def test_clean_gene(self): def test_clean_gene(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment