Commit 220b598c authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

modify handling of taxonomy

parent 69643d1f
......@@ -65,9 +65,7 @@ class ImportIGCGenes(object):
if len(resp_dict['results']) > 1:
logger.warning(f"More than 1 result found for phylum {phylum}. First result is kept.")
if resp_dict.get('count', 0) > 0:
gene_dict.update(
{'taxonomy': resp_dict['results'][0]['tax_id']}
)
gene_dict.update({'taxonomy': resp_dict['results'][0]['tax_id']})
return gene_dict
def _parse_gene(self, raw_line, selected_keys=SELECTED_KEYS):
......@@ -83,8 +81,10 @@ class ImportIGCGenes(object):
gene_dict['gene_name'] = gene_dict['gene_id']
gene_dict['gene_id'] = slugify(gene_dict['gene_id'])
gene_dict['functions'] = gene_dict.pop('kegg_ko')
if self.skip_tax:
if gene_dict.get('taxonomy', None) == 'unknown' or self.skip_tax:
gene_dict.pop('taxonomy')
else:
gene_dict = self._select_taxonomy(gene_dict)
if self.skip_functions or 'unknown' in gene_dict['functions']:
gene_dict.pop('functions')
return gene_dict
......@@ -95,7 +95,7 @@ class ImportIGCGenes(object):
chunk_genes = list(islice(file, chunk_size))
if not chunk_genes:
break
genes = [self._clean_gene(self._select_taxonomy(self._parse_gene(i))) for i in chunk_genes]
genes = [self._clean_gene(self._parse_gene(i)) for i in chunk_genes]
try:
response = self.metagenedb_gene_api.put(genes)
self.created_genes += response.get('created').get('count')
......
......@@ -72,6 +72,7 @@ class TestCleanGene(TestCase):
def setUp(self):
self.import_igc_genes = ImportIGCGenes('test', 'test')
self.import_igc_genes._select_taxonomy = lambda x: x # Mock to return same dict
self.gene_dict = {
'gene_id': 'gene.01',
'length': 135,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment