Skip to content
Snippets Groups Projects
Commit 220b598c authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion :recycle:
Browse files

modify handling of taxonomy

parent 69643d1f
Branches
Tags
2 merge requests!59Prod,!18Resolve "Create backend service to perform request to external APIs"
...@@ -65,9 +65,7 @@ class ImportIGCGenes(object): ...@@ -65,9 +65,7 @@ class ImportIGCGenes(object):
if len(resp_dict['results']) > 1: if len(resp_dict['results']) > 1:
logger.warning(f"More than 1 result found for phylum {phylum}. First result is kept.") logger.warning(f"More than 1 result found for phylum {phylum}. First result is kept.")
if resp_dict.get('count', 0) > 0: if resp_dict.get('count', 0) > 0:
gene_dict.update( gene_dict.update({'taxonomy': resp_dict['results'][0]['tax_id']})
{'taxonomy': resp_dict['results'][0]['tax_id']}
)
return gene_dict return gene_dict
def _parse_gene(self, raw_line, selected_keys=SELECTED_KEYS): def _parse_gene(self, raw_line, selected_keys=SELECTED_KEYS):
...@@ -83,8 +81,10 @@ class ImportIGCGenes(object): ...@@ -83,8 +81,10 @@ class ImportIGCGenes(object):
gene_dict['gene_name'] = gene_dict['gene_id'] gene_dict['gene_name'] = gene_dict['gene_id']
gene_dict['gene_id'] = slugify(gene_dict['gene_id']) gene_dict['gene_id'] = slugify(gene_dict['gene_id'])
gene_dict['functions'] = gene_dict.pop('kegg_ko') gene_dict['functions'] = gene_dict.pop('kegg_ko')
if self.skip_tax: if gene_dict.get('taxonomy', None) == 'unknown' or self.skip_tax:
gene_dict.pop('taxonomy') gene_dict.pop('taxonomy')
else:
gene_dict = self._select_taxonomy(gene_dict)
if self.skip_functions or 'unknown' in gene_dict['functions']: if self.skip_functions or 'unknown' in gene_dict['functions']:
gene_dict.pop('functions') gene_dict.pop('functions')
return gene_dict return gene_dict
...@@ -95,7 +95,7 @@ class ImportIGCGenes(object): ...@@ -95,7 +95,7 @@ class ImportIGCGenes(object):
chunk_genes = list(islice(file, chunk_size)) chunk_genes = list(islice(file, chunk_size))
if not chunk_genes: if not chunk_genes:
break break
genes = [self._clean_gene(self._select_taxonomy(self._parse_gene(i))) for i in chunk_genes] genes = [self._clean_gene(self._parse_gene(i)) for i in chunk_genes]
try: try:
response = self.metagenedb_gene_api.put(genes) response = self.metagenedb_gene_api.put(genes)
self.created_genes += response.get('created').get('count') self.created_genes += response.get('created').get('count')
......
...@@ -72,6 +72,7 @@ class TestCleanGene(TestCase): ...@@ -72,6 +72,7 @@ class TestCleanGene(TestCase):
def setUp(self): def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test') self.import_igc_genes = ImportIGCGenes('test', 'test')
self.import_igc_genes._select_taxonomy = lambda x: x # Mock to return same dict
self.gene_dict = { self.gene_dict = {
'gene_id': 'gene.01', 'gene_id': 'gene.01',
'length': 135, 'length': 135,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment