diff --git a/backend/metagenedb/apps/catalog/admin/function.py b/backend/metagenedb/apps/catalog/admin/function.py index 1cb688e75e69a51edd2294f7761f7b7d76ee8dfe..5c4c2e0175b26c7a0c1b20c0ddda4edf1a464165 100644 --- a/backend/metagenedb/apps/catalog/admin/function.py +++ b/backend/metagenedb/apps/catalog/admin/function.py @@ -20,7 +20,7 @@ class FunctionAdmin(admin.ModelAdmin): @admin.register(EggNog) class EggNogAdmin(admin.ModelAdmin): - list_display = ('function_id', 'name', 'long_name') + list_display = ('function_id', 'name', 'functional_category') search_fields = ('function_id', 'name') diff --git a/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py b/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py new file mode 100644 index 0000000000000000000000000000000000000000..cd2a3c90c920f80191623e0181057ab495f34331 --- /dev/null +++ b/backend/metagenedb/apps/catalog/management/commands/create_update_eggnog.py @@ -0,0 +1,83 @@ +import logging + +from django.core.management.base import BaseCommand +from django.db import IntegrityError + +from metagenedb.apps.catalog.models import EggNog, EggNogFunctionalCategory +from metagenedb.common.utils.chunks import file_len +from metagenedb.common.utils.parsers import EggNogAnnotationLineParser + +logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s') +logger = logging.getLogger(__name__) + + +class ImportEggNog(object): + + def __init__(self, file_path): + self.annotation_file = file_path + self.eggnog_parser = EggNogAnnotationLineParser() + self.processed_count = 0 + self.created_count = 0 + self.updated_count = 0 + self.skipped_count = 0 + self.skipped_ids = [] + + def _build_functional_category_dict(self): + all_categories = EggNogFunctionalCategory.objects.all() + self.functional_cat = {cat.category_id: cat for cat in all_categories} + + def link_functional_category(self, eggnog_dict): + cat_key = eggnog_dict.get('functional_category', 'S') + category = self.functional_cat.get(cat_key) + eggnog_dict.update({'functional_category': category}) + + def load_all(self, test=False): + self._build_functional_category_dict() + self.total_eggnog_nb = file_len(self.annotation_file) + with open(self.annotation_file, "r") as file: + for line in file: + eggnog_dict = self.eggnog_parser.get_dict(line) + self.link_functional_category(eggnog_dict) + payload = {k: v for k, v in eggnog_dict.items() if v != ""} + try: + eggnog = EggNog(**payload) + eggnog.save() + self.created_count += 1 + except IntegrityError: + try: + eggnog = EggNog.objects.get(function_id=payload.get('function_id')) + for k, v in payload.items(): + setattr(eggnog, k, v) + eggnog.save() + self.updated_count += 1 + except IntegrityError: + self.skipped_ids.append(payload.get('function_id')) + self.skipped_count += 1 + self.processed_count += 1 + if self.processed_count % 1000 == 0: + logger.info("%s/%s EggNog processed so far...", self.processed_count, self.total_eggnog_nb) + if test: + break + logger.info("[DONE] %s/%s EggNog created.", self.created_count, self.total_eggnog_nb) + logger.info("[DONE] %s/%s EggNog updated.", self.updated_count, self.total_eggnog_nb) + logger.info("[DONE] %s/%s EggNog skipped. List: %s", self.skipped_count, self.total_eggnog_nb, + self.skipped_ids) + + +class Command(BaseCommand): + help = 'Create or update all Eggnog entries from annotations.tsv file.' + + def add_arguments(self, parser): + parser.add_argument('annotation', help='annotations.tsv file from EggNog') + parser.add_argument('--test', action='store_true', help='Run only on first 1000 entries.') + + def set_logger_level(self, verbosity): + if verbosity > 2: + logger.setLevel(logging.DEBUG) + elif verbosity > 1: + logger.setLevel(logging.INFO) + + def handle(self, *args, **options): + self.set_logger_level(int(options['verbosity'])) + import_eggnog = ImportEggNog(options['annotation']) + import_eggnog.load_all(test=options['test']) diff --git a/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py b/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py new file mode 100644 index 0000000000000000000000000000000000000000..f2b03bf40e956740f4fe62495e09800c25e6e25d --- /dev/null +++ b/backend/metagenedb/apps/catalog/migrations/0014_remove_eggnog_long_name.py @@ -0,0 +1,17 @@ +# Generated by Django 3.0 on 2019-12-09 17:02 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('catalog', '0013_plural_eggnog'), + ] + + operations = [ + migrations.RemoveField( + model_name='eggnog', + name='long_name', + ), + ] diff --git a/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py b/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py new file mode 100644 index 0000000000000000000000000000000000000000..5627b6c2b474d827d82b4664df1ddf080a16a835 --- /dev/null +++ b/backend/metagenedb/apps/catalog/migrations/0015_increase_function_name_max_length.py @@ -0,0 +1,18 @@ +# Generated by Django 3.0 on 2019-12-09 17:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('catalog', '0014_remove_eggnog_long_name'), + ] + + operations = [ + migrations.AlterField( + model_name='function', + name='name', + field=models.CharField(max_length=200), + ), + ] diff --git a/backend/metagenedb/apps/catalog/models/function.py b/backend/metagenedb/apps/catalog/models/function.py index 9add193b079cc096ab8b6cefb518bab68cc5d685..2723416c8114ada5f61138ac811d92df9d27ff0e 100644 --- a/backend/metagenedb/apps/catalog/models/function.py +++ b/backend/metagenedb/apps/catalog/models/function.py @@ -12,7 +12,7 @@ class Function(models.Model): ] function_id = models.CharField(max_length=100, db_index=True, unique=True) - name = models.CharField(max_length=100) + name = models.CharField(max_length=200) source = models.CharField(max_length=10, choices=SOURCE_CHOICES, default=UNDEFINED) def __str__(self): @@ -38,7 +38,6 @@ class KeggOrthology(Function): class EggNog(Function): SOURCE = 'eggnog' - long_name = models.CharField(max_length=500) functional_category = models.ForeignKey( 'EggNogFunctionalCategory', related_name='eggnogs', on_delete=models.SET_NULL, @@ -64,5 +63,8 @@ class EggNogFunctionalCategory(models.Model): name = models.CharField(max_length=100) group = models.CharField(max_length=100, choices=GROUP_CHOICES) + def __str__(self): + return f"{self.category_id} ({self.name})" + class Meta: verbose_name_plural = "EggNog Functional categories" diff --git a/backend/metagenedb/common/utils/chunks.py b/backend/metagenedb/common/utils/chunks.py index 46500576509492abae9bac079e31818b20250527..086e32381472b5b661d681731ee8544fe8df4903 100644 --- a/backend/metagenedb/common/utils/chunks.py +++ b/backend/metagenedb/common/utils/chunks.py @@ -2,3 +2,10 @@ def generate_chunks(full_list, chunk_size): """Yield successive n-sized chunks from full_list.""" for i in range(0, len(full_list), chunk_size): yield full_list[i:i + chunk_size] + + +def file_len(file_path): + with open(file_path) as f: + for i, l in enumerate(f): + pass + return i + 1 diff --git a/backend/metagenedb/common/utils/parsers/eggnog.py b/backend/metagenedb/common/utils/parsers/eggnog.py index 4397493a0bd782393dfb932e628a19d8245a7fe6..51fdf35017c89528acfe5ed34a7012af141a4cf1 100644 --- a/backend/metagenedb/common/utils/parsers/eggnog.py +++ b/backend/metagenedb/common/utils/parsers/eggnog.py @@ -6,7 +6,7 @@ _LOGGER = logging.getLogger(__name__) class EggNogAnnotationLineParser(object): @staticmethod - def ko_list(line): + def get_dict(line): """ Parse line from Eggnog annotations.tsv file to return organized dict """ diff --git a/backend/metagenedb/common/utils/parsers/test_eggnog.py b/backend/metagenedb/common/utils/parsers/test_eggnog.py index 0f48ef78a77b3d0b79ee22802587b847ac6fceaf..5ffe229595a5e1b8fab690ab93345609c37a9d0d 100644 --- a/backend/metagenedb/common/utils/parsers/test_eggnog.py +++ b/backend/metagenedb/common/utils/parsers/test_eggnog.py @@ -12,10 +12,10 @@ class TestEggNogAnnotationLineParser(TestCase): 'name': "translational termination", 'functional_category': "K" } - test_dict = EggNogAnnotationLineParser.ko_list(ko_line) + test_dict = EggNogAnnotationLineParser.get_dict(ko_line) self.assertDictEqual(test_dict, expected_dict) def test_ko_list_wrong_format(self): ko_line = "This is a wrong line format, with; information and tab" with self.assertRaises(Exception) as context: # noqa - EggNogAnnotationLineParser.ko_list(ko_line) + EggNogAnnotationLineParser.get_dict(ko_line) diff --git a/backend/metagenedb/common/utils/test_chunks.py b/backend/metagenedb/common/utils/test_chunks.py index 223b87412c04235ae7b8dd66ee52d99d0148adac..b78e3b50d6df6433773697f152662e02d4656c0b 100644 --- a/backend/metagenedb/common/utils/test_chunks.py +++ b/backend/metagenedb/common/utils/test_chunks.py @@ -1,6 +1,6 @@ from unittest import TestCase -from metagenedb.common.utils.chunks import generate_chunks +from metagenedb.common.utils.chunks import generate_chunks, file_len class TestChunks(TestCase): @@ -24,3 +24,10 @@ class TestChunks(TestCase): chunks = list(generate_chunks(self.full_list, chunk_size)) self.assertEqual(len(chunks), 1) self.assertEqual(len(chunks[-1]), 10) + + +class TestFileLength(TestCase): + + def test_file_length(self): + file_path = "./dev_data/IGC_sample.annotation_OF.summary" + self.assertEqual(file_len(file_path), 1002)