Commit 793e61eb authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion
Browse files

add script to create functional categories from manage

parent e7f37e19
Pipeline #19789 passed with stages
in 2 minutes and 24 seconds
import logging
from django.core.management.base import BaseCommand
from django.core.exceptions import ValidationError
from metagenedb.apps.catalog.models import EggNogFunctionalCategory
from metagenedb.common.utils.parsers.eggnog import EggNOGFunctionalCategoriesParser
logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)
class ImportEggNOGFunctionalCategories(object):
LOOKUP_FIELD = 'category_id'
MODEL = EggNogFunctionalCategory
GROUP_MAPPING = {
'Information storage and processing': 'info_storage_processing',
'Cellular processes and signaling': 'cellular_processes_signaling',
'Metabolism': 'metabolism',
'Poorly characterized': 'poorly_characterized'
}
def __init__(self, file_path):
self.parser = EggNOGFunctionalCategoriesParser(file_path)
self.processed_count = 0
self.created_count = 0
self.updated_count = 0
self.skipped_count = 0
self.skipped_ids = []
self.skipped_errors = []
def _create_instance(self, payload):
instance = self.MODEL(**payload)
instance.full_clean()
instance.save()
self.created_count += 1
def _update_instance(self, payload):
instance = self.MODEL.objects.get(**{self.LOOKUP_FIELD: payload.get(self.LOOKUP_FIELD)})
for k, v in payload.items():
setattr(instance, k, v)
instance.full_clean()
instance.save()
self.updated_count += 1
def _handle_error(self, payload, error):
logger.error(error)
self.skipped_errors.append(error)
self.skipped_ids.append(payload.get(self.LOOKUP_FIELD))
self.skipped_count += 1
def update_group_name(self, functional_category):
functional_category['group'] = self.GROUP_MAPPING.get(functional_category['group'], None)
return functional_category
def load_all(self):
for functional_category in self.parser.parse():
functional_category = self.update_group_name(functional_category)
try:
self._create_instance(functional_category)
except ValidationError as validation_error:
if self.LOOKUP_FIELD in validation_error.error_dict.keys():
try:
self._update_instance(functional_category)
except ValidationError as validation_error:
self._handle_error(functional_category, validation_error)
self.processed_count += 1
logger.info("[DONE] %s EggNOG functional categories created.", self.created_count)
logger.info("[DONE] %s EggNOG functional categories updated.", self.updated_count)
logger.info("[DONE] %s EggNOG functional categories skipped. List: %s", self.skipped_count, self.skipped_ids)
class Command(BaseCommand):
help = 'Create or update all EggNOG functional categories from COG_functional_categories.txt file.'
def add_arguments(self, parser):
parser.add_argument('functional_categories', help='COG_functional_categories.txt file from EggNOG')
def set_logger_level(self, verbosity):
if verbosity > 2:
logger.setLevel(logging.DEBUG)
elif verbosity > 1:
logger.setLevel(logging.INFO)
def handle(self, *args, **options):
self.set_logger_level(int(options['verbosity']))
import_functional_cat = ImportEggNOGFunctionalCategories(options['functional_categories'])
import_functional_cat.load_all()
......@@ -11,7 +11,7 @@ logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)
class ImportEggNog(object):
class ImportEggNOG(object):
def __init__(self, file_path):
self.annotation_file = file_path
......@@ -71,20 +71,20 @@ class ImportEggNog(object):
eggnog.save()
self.processed_count += 1
if self.processed_count % 1000 == 0:
logger.info("%s/%s EggNog processed so far...", self.processed_count, self.total_eggnog_nb)
logger.info("%s/%s EggNOG processed so far...", self.processed_count, self.total_eggnog_nb)
if test:
break
logger.info("[DONE] %s/%s EggNog created.", self.created_count, self.total_eggnog_nb)
logger.info("[DONE] %s/%s EggNog updated.", self.updated_count, self.total_eggnog_nb)
logger.info("[DONE] %s/%s EggNog skipped. List: %s", self.skipped_count, self.total_eggnog_nb,
logger.info("[DONE] %s/%s EggNOG created.", self.created_count, self.total_eggnog_nb)
logger.info("[DONE] %s/%s EggNOG updated.", self.updated_count, self.total_eggnog_nb)
logger.info("[DONE] %s/%s EggNOG skipped. List: %s", self.skipped_count, self.total_eggnog_nb,
self.skipped_ids)
class Command(BaseCommand):
help = 'Create or update all Eggnog entries from annotations.tsv file.'
help = 'Create or update all EggNOG entries from annotations.tsv file.'
def add_arguments(self, parser):
parser.add_argument('annotation', help='annotations.tsv file from EggNog')
parser.add_argument('annotation', help='annotations.tsv file from EggNOG')
parser.add_argument('--test', action='store_true', help='Run only on first 1000 entries.')
def set_logger_level(self, verbosity):
......@@ -95,5 +95,5 @@ class Command(BaseCommand):
def handle(self, *args, **options):
self.set_logger_level(int(options['verbosity']))
import_eggnog = ImportEggNog(options['annotation'])
import_eggnog = ImportEggNOG(options['annotation'])
import_eggnog.load_all(test=options['test'])
from unittest import TestCase
from metagenedb.apps.catalog.management.commands.create_eggnog_functional_cat import ImportEggNOGFunctionalCategories
class TestUpdateGroupName(TestCase):
def test_update_group_name_information(self):
import_object = ImportEggNOGFunctionalCategories('test')
payload = {
'category_id': 'A',
'name': 'Test',
'group': 'Information storage and processing'
}
expected_output = {
'category_id': 'A',
'name': 'Test',
'group': 'info_storage_processing'
}
self.assertDictEqual(import_object.update_group_name(payload), expected_output)
def test_update_group_name_cellular(self):
import_object = ImportEggNOGFunctionalCategories('test')
payload = {
'category_id': 'A',
'name': 'Test',
'group': 'Cellular processes and signaling'
}
expected_output = {
'category_id': 'A',
'name': 'Test',
'group': 'cellular_processes_signaling'
}
self.assertDictEqual(import_object.update_group_name(payload), expected_output)
def test_update_group_name_metabolism(self):
import_object = ImportEggNOGFunctionalCategories('test')
payload = {
'category_id': 'A',
'name': 'Test',
'group': 'Metabolism'
}
expected_output = {
'category_id': 'A',
'name': 'Test',
'group': 'metabolism'
}
self.assertDictEqual(import_object.update_group_name(payload), expected_output)
def test_update_group_name_poorly(self):
import_object = ImportEggNOGFunctionalCategories('test')
payload = {
'category_id': 'A',
'name': 'Test',
'group': 'Poorly characterized'
}
expected_output = {
'category_id': 'A',
'name': 'Test',
'group': 'poorly_characterized'
}
self.assertDictEqual(import_object.update_group_name(payload), expected_output)
......@@ -9,7 +9,8 @@ class FileParser:
"""
for line in file_handler:
print(line.rstrip())
return None
def parse(self):
with open(self.file_path, 'r') as file:
self.handle_parsing(file)
return self.handle_parsing(file)
......@@ -39,7 +39,7 @@ class EggNOGFunctionalCategoriesParser(FileParser):
functional_categories.append({
'category_id': elements[0][1],
'name': elements[1],
'group': current_group
'group': current_group.capitalize()
})
elif line: # It is a group of a category
current_group = line
......
......@@ -63,9 +63,9 @@ class TestEggNOGFunctionalCategoriesParser(TestCase):
"SECOND GROUP\n", " [C] Categorie name C\n", " [D] Categorie name D\n",
]
expected_list = [
{'category_id': 'A', 'group': 'FIRST GROUP', 'name': 'Categorie name A'},
{'category_id': 'B', 'group': 'FIRST GROUP', 'name': 'Categorie name B'},
{'category_id': 'C', 'group': 'SECOND GROUP', 'name': 'Categorie name C'},
{'category_id': 'D', 'group': 'SECOND GROUP', 'name': 'Categorie name D'}
{'category_id': 'A', 'group': 'First group', 'name': 'Categorie name A'},
{'category_id': 'B', 'group': 'First group', 'name': 'Categorie name B'},
{'category_id': 'C', 'group': 'Second group', 'name': 'Categorie name C'},
{'category_id': 'D', 'group': 'Second group', 'name': 'Categorie name D'}
]
self.assertListEqual(parser.handle_parsing(fake_file_handler), expected_list)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment