diff --git a/ippisite/db.sqlite3 b/ippisite/db.sqlite3 index c3e99fb2ae8e73ea41d75c9bf93ea2b62ad14a97..815d6c6f6d68c825abe74c75c9a73a46f35835b4 100644 Binary files a/ippisite/db.sqlite3 and b/ippisite/db.sqlite3 differ diff --git a/ippisite/ippidb/management/commands/import_v1_data.py b/ippisite/ippidb/management/commands/import_v1_data.py index 04bc53a12db29aad5f03b7f74e1acb93acec9276..33672008bda48f27ebc6116b659c1f0a042eaf09 100644 --- a/ippisite/ippidb/management/commands/import_v1_data.py +++ b/ippisite/ippidb/management/commands/import_v1_data.py @@ -2,12 +2,8 @@ import glob import re import copy -from django.utils import timezone -from django.db import transaction -from django.core.management import BaseCommand, CommandError +from django_diu.import_command import ImportCommand, ImportTask, ListImportTask, MysqlImportTask, MyConverter import mysql.connector -import requests_cache -import click from ippidb.models import Bibliography, Protein, Taxonomy, MolecularFunction, \ Domain, ProteinDomainBoundComplex, ProteinDomainPartnerComplex, Symmetry, Ppi, PpiComplex, Disease, \ @@ -16,142 +12,6 @@ from ippidb.models import Bibliography, Protein, Taxonomy, MolecularFunction, \ CompoundCytotoxicityResult, CompoundPKResult, PpiFamily -class MyConverter(mysql.connector.conversion.MySQLConverter): - - def row_to_python(self, row, fields): - row = super(MyConverter, self).row_to_python(row, fields) - - def to_unicode(col): - if type(col) == bytearray: - return col.decode('utf-8') - return col - - return[to_unicode(col) for col in row] - -class ImportTask(object): - - description = "Abstract import task" - - option = "" - - target_classes = {} - - main_class = None - - depends_on = [] - - def __init__(self, command, traceback=False, stop_on_fail=False, progress_bar=False): - self.out_stream = command.stdout - self.err_stream = command.stderr - self.style = command.style - self.traceback = traceback - self.stop_on_fail = stop_on_fail - self.progress_bar = progress_bar - self.done = False - - def _flush_target_models(self): - for target_class in self.target_classes: - self.out_stream.write( - 'Flushing {target_class} models...'.format(target_class=target_class.__name__)) - target_class.objects.all().delete() - self.out_stream.write(self.style.SUCCESS( - 'Successfully flushed {target_class} models!'.format(target_class=target_class.__name__))) - - def check_final_count(self): - if self.main_class: - count = self.main_class.objects.count() - if count==self.source_count: - self.out_stream.write(self.style.SUCCESS( - 'rows count ok for model {}: expected {}, counted {}'.format(self.main_class.__name__, self.source_count, count))) - else: - message = 'rows count not ok for model {}: expected {}, counted {}'.format( - self.main_class.__name__, self.source_count, count) - if self.stop_on_fail: - raise CommandError(message) - else: - self.out_stream.write(self.style.ERROR(message)) - - def migrate_row(self,row): - raise NotImplementedError() - - def open_data_source(self): - raise NotImplementedError() - - def post_process(self): - pass - - def _process_rows(self, rows): - for row in rows: - try: - new_object = None - with transaction.atomic(): - new_object = self.migrate_row(row) - except Exception as e: - if self.traceback: - import traceback - self.err_stream.write(self.style.NOTICE(traceback.format_exc())) - if self.stop_on_fail: - raise CommandError( - 'Failed inserting {}'.format(new_object)) - else: - self.out_stream.write(self.style.ERROR( - 'Failed inserting {}'.format(new_object))) - else: - if self.progress_bar == False: - self.out_stream.write(self.style.SUCCESS( - 'Successfully inserted {}'.format(new_object))) - - def count_source(self): - self.source_count = len(self.rows) - - def _run_import_loop(self): - self.open_data_source() - self.count_source() - if self.progress_bar is True: - with click.progressbar(self.rows, - label='Importing ' + self.description + ' (' + str(self.source_count) + ' rows to process).') as rows_list: - self._process_rows(rows_list) - else: - self._process_rows(self.rows) - self.post_process() - self.check_final_count() - self.done = True - - def check(self): - self.open_data_source() - self.count_source() - self.check_final_count() - - def run(self): - self._flush_target_models() - self._run_import_loop() - -class MysqlImportTask(ImportTask): - - description = "Abstract MySQL import task" - - outer_sql = "" - - def set_mysql_conn(self, conn): - self.conn = conn - - def get_cursor(self): - return self.conn.cursor() - - def open_data_source(self): - cursor = self.get_cursor() - cursor.execute(self.outer_sql) - self.rows = cursor.fetchall() - -class ListImportTask(ImportTask): - - description = "Abstract Python list import task" - - DATA = [] - - def open_data_source(self): - self.rows = self.DATA - class SymmetriesImportTask(ListImportTask): description = "Symmetries import" @@ -181,8 +41,13 @@ class SymmetriesImportTask(ListImportTask): symmetry.save() return symmetry +class IppiDBMySQLImportTask(MysqlImportTask): -class ProteinsImportTask(MysqlImportTask): + def __init__(self, command, **kwargs): + super().__init__(command, **kwargs) + self.conn = command.ippidb_source_conn + +class ProteinsImportTask(IppiDBMySQLImportTask): description = "Proteins import" @@ -202,7 +67,7 @@ class ProteinsImportTask(MysqlImportTask): return p -class BibliographyImportTask(MysqlImportTask): +class BibliographyImportTask(IppiDBMySQLImportTask): description = "Bibliography references import" @@ -233,7 +98,7 @@ class BibliographyImportTask(MysqlImportTask): return b -class DomainImportTask(MysqlImportTask): +class DomainImportTask(IppiDBMySQLImportTask): description = "Domains import" @@ -254,7 +119,7 @@ class DomainImportTask(MysqlImportTask): return d -class CompoundImportTask(MysqlImportTask): +class CompoundImportTask(IppiDBMySQLImportTask): description = "Compound import" @@ -327,7 +192,7 @@ class CompoundImportTask(MysqlImportTask): return compound -class RefCompoundBiblioImportTask(MysqlImportTask): +class RefCompoundBiblioImportTask(IppiDBMySQLImportTask): description = "RefCompoundBiblio import" @@ -359,7 +224,7 @@ class RefCompoundBiblioImportTask(MysqlImportTask): return r -class PpiImportTask(MysqlImportTask): +class PpiImportTask(IppiDBMySQLImportTask): description = "PPIs import" @@ -574,7 +439,7 @@ class AdditionalCasImportTask(ListImportTask): return ca -class TestActivityDescriptionImportTask(MysqlImportTask): +class TestActivityDescriptionImportTask(IppiDBMySQLImportTask): description = "TestActivityDescription import" @@ -616,7 +481,7 @@ class TestActivityDescriptionImportTask(MysqlImportTask): tad.save() return tad -class CompoundActivityResultImportTask(MysqlImportTask): +class CompoundActivityResultImportTask(IppiDBMySQLImportTask): description = "CompoundActivityResult import" @@ -646,7 +511,7 @@ class CompoundActivityResultImportTask(MysqlImportTask): return car -class TestCytotoxDescriptionImportTask(MysqlImportTask): +class TestCytotoxDescriptionImportTask(IppiDBMySQLImportTask): description = "TestCytotoxDescription import" @@ -679,7 +544,7 @@ class TestCytotoxDescriptionImportTask(MysqlImportTask): tcd.save() return tcd -class CompoundCytotoxicityResultImportTask(MysqlImportTask): +class CompoundCytotoxicityResultImportTask(IppiDBMySQLImportTask): description = "CompoundCytotoxicityResult import" @@ -706,7 +571,7 @@ class CompoundCytotoxicityResultImportTask(MysqlImportTask): return ccr -class TestPKDescriptionImportTask(MysqlImportTask): +class TestPKDescriptionImportTask(IppiDBMySQLImportTask): description = "TestPKDescription import" @@ -750,7 +615,7 @@ class TestPKDescriptionImportTask(MysqlImportTask): return tpd -class CompoundPKResultImportTask(MysqlImportTask): +class CompoundPKResultImportTask(IppiDBMySQLImportTask): description = "CompoundPKResult import" @@ -784,7 +649,7 @@ class CompoundPKResultImportTask(MysqlImportTask): cpr.save() return cpr -class Command(BaseCommand): +class Command(ImportCommand): help = "Import iPPI-DB data from the MySQL database" task_classes = [ProteinsImportTask, @@ -803,93 +668,6 @@ class Command(BaseCommand): CompoundPKResultImportTask ] - def add_arguments(self, parser): - task_choices = [task_class.option for task_class in self.task_classes] - task_choices.append('all') - task_help = 'Import task to be run.\n ' - task_help += ',\n \n'.join([task_class.option + ': ' + task_class.description for task_class in self.task_classes]) - task_help += ',\n \nall: import everything.' - parser.add_argument( - 'task', - type=str, - choices=task_choices, - help=task_help, - ) - parser.add_argument( - '--all', - action='store_true', - dest='all', - default=False, - help='import everything', - ) - parser.add_argument( - '--wscache', - action='store_true', - dest='wscache', - default=False, - help='use web services cache', - ) - parser.add_argument( - '--errortb', - action='store_true', - dest='errortb', - default=False, - help='show tracebacks on errors', - ) - parser.add_argument( - '--stoponfail', - action='store_true', - dest='stoponfail', - default=False, - help='stop on first error', - ) - parser.add_argument( - '--progressbar', - action='store_true', - dest='progress_bar', - default=False, - help='show progressbar instead of logging inserted entries', - ) - parser.add_argument( - '--check', - action='store_true', - dest='check', - default=False, - help='only check instead of running import', - ) - def handle(self, *args, **options): - conn = mysql.connector.connect( - converter_class=MyConverter, host="localhost", user="root", password="ippidb", database="ippidb") - if options.get('wscache'): - requests_cache.install_cache('ws_cache') - task_option = options.get('task') - # map task names to task classes - option_to_task = {task_class.option: task_class for task_class in self.task_classes} - # map task names to the list of task names that depend upon them - dependency_to_task = {task_class.option: [] for task_class in self.task_classes} - for task_class in self.task_classes: - for task_depency_class in task_class.depends_on: - dependency_to_task[task_depency_class.option].append(task_class.option) - dependency_to_task['all'] = option_to_task.keys() - def append_dependencies(option): - dependencies = set([option]) - for depending_option in dependency_to_task.get(option,[]): - dependencies.update(append_dependencies(depending_option)) - return dependencies - if task_option: - # list of tasks to run because they depend on initial option, - # directly or not - dependencies = append_dependencies(task_option) - print(dependencies) - dependencies_classes = {task_class for opt, task_class in option_to_task.items() if opt in dependencies} - conn = mysql.connector.connect( - converter_class=MyConverter, host="localhost", user="root", password="ippidb", database="ippidb") - while len(dependencies_classes)>0: - for task_class in copy.copy(dependencies_classes): - if not(set(task_class.depends_on) & dependencies_classes): - task = task_class(self, options['errortb'], options['stoponfail'], options['progress_bar']) - if hasattr(task, 'set_mysql_conn'): - task.set_mysql_conn(conn) - task.run() - dependencies_classes.remove(task_class) + self.ippidb_source_conn = mysql.connector.connect( + converter_class=MyConverter, host="localhost", user="root", password="ippidb", database="ippidb") \ No newline at end of file diff --git a/ippisite/requirements-core.txt b/ippisite/requirements-core.txt index 6d8bbdcc121fe433746260371af23f995ed54760..1875e4e32414a68f6f4c35959203ca9cf1e171cb 100644 --- a/ippisite/requirements-core.txt +++ b/ippisite/requirements-core.txt @@ -18,5 +18,6 @@ scipy matplotlib==2.2.3 #Matplotlib 3.0+ does not support Python 2.x, 3.0, 3.1, 3.2, 3.3, or 3.4. # postgres driver psycopg2 +git+ssh://git@gitlab.pasteur.fr/hmenager/django-diu.git#egg=django_diu # openbabel is not installed from pip for now #openbabel \ No newline at end of file