diff --git a/ippisite/ippidb/admin.py b/ippisite/ippidb/admin.py index 6253bfa4ae4ae5033aea3b87f0360a72e6344892..886f7d3395b8bdd61b2b4f650564ad2390010aea 100644 --- a/ippisite/ippidb/admin.py +++ b/ippisite/ippidb/admin.py @@ -70,6 +70,9 @@ class TextActivityDescription(admin.ModelAdmin): @admin.register(Ppi) class PpiModelAdmin(admin.ModelAdmin): filter_horizontal = ('diseases',) + list_display = ('pdb_id', 'name', 'symmetry', 'family') + list_filter = ('diseases',) + search_fields = ('pdb_id', 'name', 'symmetry', 'family__name', 'diseases__name') @admin.register(ProteinDomainComplex) diff --git a/ippisite/ippidb/management/commands/import_v1_data.py b/ippisite/ippidb/management/commands/import_v1_data.py index 6386137e879898b8146389dd9c2ef2f49418f439..04bc53a12db29aad5f03b7f74e1acb93acec9276 100644 --- a/ippisite/ippidb/management/commands/import_v1_data.py +++ b/ippisite/ippidb/management/commands/import_v1_data.py @@ -1,9 +1,13 @@ import glob import re +import copy from django.utils import timezone +from django.db import transaction from django.core.management import BaseCommand, CommandError import mysql.connector +import requests_cache +import click from ippidb.models import Bibliography, Protein, Taxonomy, MolecularFunction, \ Domain, ProteinDomainBoundComplex, ProteinDomainPartnerComplex, Symmetry, Ppi, PpiComplex, Disease, \ @@ -24,732 +28,868 @@ class MyConverter(mysql.connector.conversion.MySQLConverter): return[to_unicode(col) for col in row] +class ImportTask(object): + + description = "Abstract import task" + + option = "" + + target_classes = {} + + main_class = None + + depends_on = [] + + def __init__(self, command, traceback=False, stop_on_fail=False, progress_bar=False): + self.out_stream = command.stdout + self.err_stream = command.stderr + self.style = command.style + self.traceback = traceback + self.stop_on_fail = stop_on_fail + self.progress_bar = progress_bar + self.done = False + + def _flush_target_models(self): + for target_class in self.target_classes: + self.out_stream.write( + 'Flushing {target_class} models...'.format(target_class=target_class.__name__)) + target_class.objects.all().delete() + self.out_stream.write(self.style.SUCCESS( + 'Successfully flushed {target_class} models!'.format(target_class=target_class.__name__))) + + def check_final_count(self): + if self.main_class: + count = self.main_class.objects.count() + if count==self.source_count: + self.out_stream.write(self.style.SUCCESS( + 'rows count ok for model {}: expected {}, counted {}'.format(self.main_class.__name__, self.source_count, count))) + else: + message = 'rows count not ok for model {}: expected {}, counted {}'.format( + self.main_class.__name__, self.source_count, count) + if self.stop_on_fail: + raise CommandError(message) + else: + self.out_stream.write(self.style.ERROR(message)) + + def migrate_row(self,row): + raise NotImplementedError() + + def open_data_source(self): + raise NotImplementedError() + + def post_process(self): + pass + + def _process_rows(self, rows): + for row in rows: + try: + new_object = None + with transaction.atomic(): + new_object = self.migrate_row(row) + except Exception as e: + if self.traceback: + import traceback + self.err_stream.write(self.style.NOTICE(traceback.format_exc())) + if self.stop_on_fail: + raise CommandError( + 'Failed inserting {}'.format(new_object)) + else: + self.out_stream.write(self.style.ERROR( + 'Failed inserting {}'.format(new_object))) + else: + if self.progress_bar == False: + self.out_stream.write(self.style.SUCCESS( + 'Successfully inserted {}'.format(new_object))) + + def count_source(self): + self.source_count = len(self.rows) + + def _run_import_loop(self): + self.open_data_source() + self.count_source() + if self.progress_bar is True: + with click.progressbar(self.rows, + label='Importing ' + self.description + ' (' + str(self.source_count) + ' rows to process).') as rows_list: + self._process_rows(rows_list) + else: + self._process_rows(self.rows) + self.post_process() + self.check_final_count() + self.done = True + + def check(self): + self.open_data_source() + self.count_source() + self.check_final_count() + + def run(self): + self._flush_target_models() + self._run_import_loop() + +class MysqlImportTask(ImportTask): + + description = "Abstract MySQL import task" + + outer_sql = "" + + def set_mysql_conn(self, conn): + self.conn = conn + + def get_cursor(self): + return self.conn.cursor() + + def open_data_source(self): + cursor = self.get_cursor() + cursor.execute(self.outer_sql) + self.rows = cursor.fetchall() + +class ListImportTask(ImportTask): + + description = "Abstract Python list import task" + + DATA = [] + + def open_data_source(self): + self.rows = self.DATA + +class SymmetriesImportTask(ListImportTask): + + description = "Symmetries import" + + option = "symmetries" + + target_classes = [Symmetry] + + main_class = Symmetry + + DATA = [ + ['AS', 'asymmetric'], + ['C2', 'C2 symmetry'], + ['D2', 'D2 symmetry'], + ['C3', 'C3 symmetry'], + ['D3', 'D3 symmetry'], + ['C4', 'C4 symmetry'], + ['D4', 'D4 symmetry'], + ['C5', 'C5 symmetry'], + ['D5', 'D5 symmetry'] + ] + + def migrate_row(self, row): + symmetry = Symmetry() + symmetry.code = row[0] + symmetry.description = row[1] + symmetry.save() + return symmetry + + +class ProteinsImportTask(MysqlImportTask): + + description = "Proteins import" + + outer_sql = "SELECT * FROM protein" + + option = "prot" + + target_classes = [Protein] + + main_class = Protein + + def migrate_row(self, row): + p = Protein() + p.id = row[0] + p.uniprot_id = row[1] + p.save(autofill=True) + return p + + +class BibliographyImportTask(MysqlImportTask): + + description = "Bibliography references import" + + outer_sql = "SELECT * FROM biblio" + + option = "bib" + + target_classes = [Bibliography] + + main_class = Bibliography + + def migrate_row(self, row): + b = Bibliography() + b.id = row[0] + if row[1] == 'article': + b.source = 'PM' + else: + b.source = 'PT' + b.id_source = row[2] + b.cytotox = row[6]=='Y' + b.in_silico = row[7]=='Y' + b.in_vitro = row[8]=='Y' + b.in_vivo = row[9]=='Y' + b.in_cellulo = row[10]=='Y' + b.pharmacokinetic = row[11]=='Y' + b.xray = row[12]=='Y' + b.save(autofill=True) + return b + + +class DomainImportTask(MysqlImportTask): + + description = "Domains import" + + outer_sql= "SELECT * FROM domain" + + option = "dom" + + target_classes = [Domain, Taxonomy, MolecularFunction] + + main_class = Domain + + def migrate_row(self, row): + d = Domain() + d.id = row[0] + d.pfam_acc = row[2] + d.domain_family = row[4] + d.save(autofill=True) + return d + + +class CompoundImportTask(MysqlImportTask): + + description = "Compound import" + + outer_sql= "SELECT * FROM compound" + + option = "com" + + target_classes = [Compound] + + main_class = Compound + + def migrate_row(self, row): + compound = Compound() + compound.id = row[0] + compound.canonical_smile = row[1] + compound.is_macrocycle = (row[4] == 'Y') + compound.aromatic_ratio = row[5] + compound.balaban_index = row[6] + compound.fsp3 = row[7] # Csp3Ratio + compound.gc_molar_refractivity = row[ + 10] # GCMolarRefractivity + compound.log_d = row[13] # LogD + compound.a_log_p = row[14] # ALogP + compound.mean_atom_vol_vdw = row[15] # MeanAtomVolVdW + compound.molecular_weight = row[16] # MolecularWeight + compound.nb_acceptor_h = row[17] # NbAcceptorH + compound.nb_aliphatic_amines = row[ + 18] # NbAliphaticsAmines + compound.nb_aromatic_bonds = row[19] # NbAromaticBonds + compound.nb_aromatic_ether = row[20] # NbAromaticsEther + compound.nb_aromatic_sssr = row[21] # NbAromaticsSSSR + compound.nb_atom = row[22] # NbAtom + compound.nb_atom_non_h = row[23] # NbAtomNonH + compound.nb_benzene_like_rings = row[24] # NbBenzLikeRings + compound.nb_bonds = row[25] # NbBonds + compound.nb_bonds_non_h = row[26] # NbBondsNonH + compound.nb_br = row[27] # NbBr + compound.nb_c = row[28] # NbC + compound.nb_chiral_centers = row[29] # NbChiralCenters + compound.nb_circuits = row[30] # NbCircuits + compound.nb_cl = row[31] # NbCl + compound.nb_csp2 = row[32] # NbCsp2 + compound.nb_csp3 = row[33] # NbCsp3 + compound.nb_donor_h = row[34] # NbDonorH + compound.nb_double_bonds = row[35] # NbDoubleBonds + compound.nb_f = row[36] # NbF + compound.nb_i = row[37] # NbI + compound.nb_multiple_bonds = row[38] # NbMultBonds + compound.nb_n = row[39] # NbN + compound.nb_o = row[40] # NbO + compound.nb_rings = row[41] # NbRings + compound.nb_rotatable_bonds = row[42] # NbRotatableBonds + compound.randic_index = row[44] # RandicIndex + compound.rdf070m = row[45] # RDF070m + compound.rotatable_bond_fraction = row[ + 46] # RotatableBondFraction + compound.sum_atom_polar = row[47] # SumAtomPolar + compound.sum_atom_vol_vdw = row[48] # SumAtomVolVdW + compound.tpsa = row[51] # TPSA + compound.ui = row[52] # Ui + compound.wiener_index = row[54] # WienerIndex + if row[55] != 'N': + compound.common_name = row[55] # CmpdNameSh + compound.pubchem_id = row[56] # IdPubchem + if row[57] != 'N': + compound.chemspider_id = row[57] # IdPubchem + compound.chembl_id = row[58] + compound.iupac_name = row[59] + compound.save(autofill=True) + return compound + + +class RefCompoundBiblioImportTask(MysqlImportTask): + + description = "RefCompoundBiblio import" + + outer_sql= """ + select r.CmpdNameInBiblio, c.CanonicalSmile, b.IDSource + from refCmpdBiblio as r inner join compound as c + on r.IDCompound=c.IDCompound + inner join biblio as b on r.IDBiblio=b.IDBiblio;""" + + option = "rcb" + + target_classes = [RefCompoundBiblio] + + main_class = RefCompoundBiblio + + depends_on = [BibliographyImportTask, CompoundImportTask] + + def migrate_row(self, row): + c = Compound.objects.get(canonical_smile=row[1]) + b = Bibliography.objects.get(id_source=row[2]) + r = RefCompoundBiblio() + r.compound_id = c.id + r.bibliography_id = b.id + r.compound_name = '-'.join(re.split('[-.]',row[0])[1:]) + # mysql format for this field is [PMID/WIPOID]-name: we remove the first part + # sometimes there is a . instead of the dash + # sometimes we have more than one - + r.save() + return r + + +class PpiImportTask(MysqlImportTask): + + description = "PPIs import" + + outer_sql= """ + select distinct protein.NumUniprot, domain.PfamNumAccession, complexe.NbCopy, cmpdAction.IDComplexeBound, bindingSiteEvidence.CodePDB, + 'part1', ppi.IDPPI, disease.Disease, complexe.IDComplexe, ppi.Family from bindingSite inner join ppi on (bindingSite.IDBindingSite=ppi.IDBindingSite1) + inner join complexe on (ppi.IDComplexe1=complexe.IDComplexe) left outer join cmpdAction on (complexe.IDComplexe=cmpdAction.IDComplexeBound) + inner join protein on (bindingSite.IDProtein=protein.IDProtein) inner join domain on (bindingSite.IDDomain=domain.IDDomain) + inner join disease on (disease.IDPPI=ppi.IDPPI) left outer join bindingSiteEvidence on (ppi.IDPPI=bindingSiteEvidence.IDPPI) + union + select distinct protein.NumUniprot, domain.PfamNumAccession , complexe.NbCopy, cmpdAction.IDComplexeBound, null, + 'part2', ppi.IDPPI, disease.Disease, complexe.IDComplexe, ppi.Family from bindingSite inner join ppi on (bindingSite.IDBindingSite=ppi.IDBindingSite2) + inner join complexe on (ppi.IDComplexe2=complexe.IDComplexe) left outer join cmpdAction on (complexe.IDComplexe=cmpdAction.IDComplexeBound) + inner join protein on (bindingSite.IDProtein=protein.IDProtein) inner join domain on (bindingSite.IDDomain=domain.IDDomain) + inner join disease on (disease.IDPPI=ppi.IDPPI) + """ + + option = "ppi" + + target_classes = [ProteinDomainBoundComplex, ProteinDomainPartnerComplex, Disease, Ppi, PpiFamily, PpiComplex, CompoundAction] + + main_class = Ppi + + depends_on = [ProteinsImportTask, DomainImportTask, CompoundImportTask] + + def migrate_row(self, row): + cursor_aux = self.get_cursor() + cursor_aux2 = self.get_cursor() + # create or retrieve Ppi object + if row[5] == 'part1': + ppi = Ppi() + ppi.id = row[6] + disease, created = Disease.objects.get_or_create( + name=row[7]) + ppi_family, created = PpiFamily.objects.get_or_create( + name=row[9].strip()) + ppi.family = ppi_family + ppi.pdb_id = row[4] + ppi.pockets_nb = 1 + ppi.symmetry = Symmetry.objects.get(code='AS') + ppi.save() + ppi.diseases.add(disease) + ppi.save() + else: + ppi = Ppi.objects.get(id=row[6]) + # create a complex + if row[3] is None: + c = ProteinDomainPartnerComplex() + else: + c = ProteinDomainBoundComplex() + c.id = row[8] + protein = Protein.objects.get(uniprot_id=row[0]) + c.protein = protein + domain = Domain.objects.get(pfam_acc=row[1]) + c.domain = domain + c.ppc_copy_nb = row[2] + if isinstance(c, ProteinDomainBoundComplex): + c.ppp_copy_nb_per_p = 1 + c.save() + # create the PpiComplex object + ppi_complex = PpiComplex() + ppi_complex.ppi = ppi + ppi_complex.complex = c + ppi_complex.cc_nb = 1 + ppi_complex.save() + if row[3] is not None: + sql_ca_string = '''select distinct c.CanonicalSmile from cmpdAction as ca + inner join compound as c on ca.IDCompound=c.IDCompound where ca.IDComplexeBound='''\ + + str(row[3]) + cursor_aux.execute(sql_ca_string) + self.rows_aux = cursor_aux.fetchall() + for row_aux in self.rows_aux: + canonical_smile = row_aux[0] + sql_bse_string = '''select CodePDB from bindingSiteEvidence where IDPPI=''' + str(ppi.id) + cursor_aux2.execute(sql_bse_string) + row_aux2 = cursor_aux2.fetchone() + if row_aux2 is None: + pdb_id_ca = None + else: + pdb_id_ca = row_aux2[0] + c = Compound.objects.get(canonical_smile=canonical_smile) + ca = CompoundAction() + ca.compound = c + ca.ppi = ppi + ca.activation_mode = 'U' + ca.nb_copy_compounds = 1 + ca.pdb_id = pdb_id_ca + ca.save() + return ppi + + def post_process(self): + # add names to PPIs automatically once the PPI complexes which the names + # depend on have been inserted + for ppi in Ppi.objects.all(): + ppi.save(autofill=True) + + + def check_final_count(self): + self.source_count = int(self.source_count/2) + super().check_final_count() + +class AdditionalCasImportTask(ListImportTask): + + description = "Additional Compound Actions import (hardcoded because not in the source file)" + + DATA = [ + {'compound_id':15, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':36, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':57, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':69, 'ppi_name': 'IL2 / IL2RA'}, + {'compound_id':78, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':110, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':111, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':120, 'ppi_name': 'XIAP / DBLOH'}, + {'compound_id':124, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':144, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':171, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':213, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':221, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':238, 'ppi_name': 'IL2 / IL2RA'}, + {'compound_id':272, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':305, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':316, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':354, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':355, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':399, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':406, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':409, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':412, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':458, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':463, 'ppi_name': 'IL2 / IL2RA'}, + {'compound_id':465, 'ppi_name': 'XIAP / DBLOH'}, + {'compound_id':469, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':477, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':485, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':487, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':490, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':538, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':563, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':585, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':590, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':626, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':666, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':683, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':702, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':705, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':714, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':734, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':769, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':786, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':801, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':814, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':817, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':886, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':971, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':977, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':999, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1011, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1026, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1031, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1055, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1056, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1069, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1115, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1122, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1126, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1153, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1155, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1160, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1202, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1221, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1251, 'ppi_name': 'XIAP / DBLOH'}, + {'compound_id':1268, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1280, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1296, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1297, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1310, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1313, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1317, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1376, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1398, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1402, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1403, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1407, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1434, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1453, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1477, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1481, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1504, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1514, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1553, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1594, 'ppi_name': 'BCL2 / BAK'}, + {'compound_id':1648, 'ppi_name': 'MDM2 / P53'}, + {'compound_id':1649, 'ppi_name': 'MDM2 / P53'}, + ] + + option = "acas" + + target_classes = [CompoundAction] + + main_class = CompoundAction + + depends_on = [CompoundImportTask, PpiImportTask] + + def migrate_row(self, row): + ca = CompoundAction() + ca.compound = Compound.objects.get(id=row['compound_id']) + ca.ppi = Ppi.objects.get(name=row['ppi_name']) + ca.activation_mode = 'U' + ca.nb_copy_compounds = 1 + ca.save() + return ca + + +class TestActivityDescriptionImportTask(MysqlImportTask): + + description = "TestActivityDescription import" + + outer_sql= """ + SELECT * FROM testActivityDescription + """ + + option = "tad" + + target_classes = [TestActivityDescription] + + main_class = TestActivityDescription + + depends_on = [BibliographyImportTask, PpiImportTask] + + def migrate_row(self, row): + tad = TestActivityDescription() + tad.id = row[0] + cursor_biblio = self.get_cursor() + cursor_biblio.execute( + """select IDSource from biblio where IDBiblio={}""".format(row[2])) + biblio_row = cursor_biblio.fetchone() + biblio = Bibliography.objects.get(id_source=biblio_row[0]) + tad.biblio = biblio + tad.protein_domain_bound_complex = ProteinDomainBoundComplex.objects.get(id=row[1]) + tad.ppi = Ppi.objects.get(id=row[3]) + tad.test_name = row[4] + tad.is_primary = row[5]=='Y' + if row[6]=='Unspecified': + tad.protein_bound_construct = 'U' + elif row[6]=='Full length': + tad.protein_bound_construct = 'F' + tad.test_type = row[7].upper() + tad.test_modulation_type = row[8][0] + tad.nb_active_compounds = row[9] + if row[16] is not None: + tad.cell_line, created = CellLine.objects.get_or_create( + name=row[16]) + tad.save() + return tad + +class CompoundActivityResultImportTask(MysqlImportTask): + + description = "CompoundActivityResult import" + + outer_sql= """ + select c.CanonicalSmile, r.IDTestActivity, r.ActivityType, + r.Activity, r.PourcentInhib from cmpdActiveResult as r + inner join compound as c on r.IDCompound = c.IDCompound; + """ + + option = "car" + + target_classes = [CompoundActivityResult] + + main_class = CompoundActivityResult + + depends_on = [TestActivityDescriptionImportTask, CompoundImportTask] + + def migrate_row(self, row): + car = CompoundActivityResult() + car.test_activity_description = TestActivityDescription.objects.get(id=row[1]) + car.compound = Compound.objects.get(canonical_smile=row[0]) + car.activity_type = row[2] + car.activity = row[3] + car.inhibition_percentage = row[4] + car.modulation_type = 'I' # because previous DB is only about inhibitors + car.save() + return car + + +class TestCytotoxDescriptionImportTask(MysqlImportTask): + + description = "TestCytotoxDescription import" + + outer_sql= """ + SELECT * FROM testCytotoxDescription + """ + + option = "tcd" + + target_classes = [TestCytotoxDescription] + + main_class = TestCytotoxDescription + + depends_on = [BibliographyImportTask, CompoundImportTask] + + def migrate_row(self, row): + tcd = TestCytotoxDescription() + tcd.id = row[0] + cursor_biblio = self.get_cursor() + cursor_biblio.execute( + """select IDSource from biblio where IDBiblio={}""".format(row[1])) + biblio_row = cursor_biblio.fetchone() + biblio = Bibliography.objects.get(id_source=biblio_row[0]) + tcd.biblio = biblio + tcd.test_name = row[2] + tcd.compound_concentration = row[4] + if row[3] is not None: + tcd.cell_line, created = CellLine.objects.get_or_create( + name=row[3]) + tcd.save() + return tcd + +class CompoundCytotoxicityResultImportTask(MysqlImportTask): + + description = "CompoundCytotoxicityResult import" + + outer_sql= """ + select c.CanonicalSmile, r.IDTestCytotox, r.IDTestCytotox + from cmpdCytotoxResult as r inner join compound as c + on r.IDCompound = c.IDCompound; + """ + + option = "ccr" + + target_classes = [CompoundCytotoxicityResult] + + main_class = CompoundCytotoxicityResult + + depends_on = [CompoundImportTask, TestCytotoxDescriptionImportTask] + + def migrate_row(self, row): + ccr = CompoundCytotoxicityResult() + ccr.test_cytotoxicity_description = TestCytotoxDescription.objects.get(id=row[1]) + ccr.compound = Compound.objects.get(canonical_smile=row[0]) + ccr.toxicity = row[2] == 'Y' + ccr.save() + return ccr + + +class TestPKDescriptionImportTask(MysqlImportTask): + + description = "TestPKDescription import" + + outer_sql= """ + SELECT * FROM testPKDescription + """ + + option = "tpd" + + target_classes = [TestPKDescription] + + main_class = TestPKDescription + + depends_on = [BibliographyImportTask, CompoundImportTask] + + def migrate_row(self, row): + tpd = TestPKDescription() + tpd.id = row[0] + cursor_biblio = self.get_cursor() + cursor_biblio.execute( + """select IDSource from biblio where IDBiblio={}""".format(row[1])) + biblio_row = cursor_biblio.fetchone() + biblio = Bibliography.objects.get(id_source=biblio_row[0]) + tpd.biblio = biblio + tpd.test_name = row[2] + #tcd.compound_concentration = row[4] + try: + taxonomy = Taxonomy.objects.get(taxonomy_id=10090) + except Taxonomy.DoesNotExist: + taxonomy = Taxonomy() + taxonomy.taxonomy_id = 10090 + # dirty hack: all organisms in this table are "mice", + # hence assuming Mus musculus + taxonomy.save(autofill=True) + tpd.organism = taxonomy + tpd.administration_mode = row[4] + tpd.concentration = row[5] + tpd.dose = row[6] + tpd.dose_interval = row[7] + tpd.save() + return tpd + + +class CompoundPKResultImportTask(MysqlImportTask): + + description = "CompoundPKResult import" + + outer_sql= """ + select c.CanonicalSmile, r.IDTestPK, r.Tolerated, r.AUC, r.Clearance, + r.Cmax, r.OralBioavailability, r.Tdemi, r.Tmax, r.VolDistribution + from cmpdPKResult as r inner join compound as c + on r.IDCompound = c.IDCompound; + """ + + option = "cpr" + + target_classes = [CompoundPKResult] + + main_class = CompoundPKResult + + depends_on = [CompoundImportTask, TestPKDescriptionImportTask] + + def migrate_row(self, row): + cpr = CompoundPKResult() + cpr.test_pk_description = TestPKDescription.objects.get(id=row[1]) + cpr.compound = Compound.objects.get(canonical_smile=row[0]) + cpr.tolerated = row[2] == 'Y' + cpr.auc = row[3] + cpr.clearance = row[4] + cpr.c_max = row[5] + cpr.oral_bioavailability = row[6] + cpr.t_demi = row[7] + cpr.t_max = row[8] + cpr.voldistribution = row[9] + cpr.save() + return cpr class Command(BaseCommand): + help = "Import iPPI-DB data from the MySQL database" - help = "Import data from the local v1 database" + task_classes = [ProteinsImportTask, + BibliographyImportTask, + DomainImportTask, + CompoundImportTask, + SymmetriesImportTask, + PpiImportTask, + RefCompoundBiblioImportTask, + AdditionalCasImportTask, + TestActivityDescriptionImportTask, + CompoundActivityResultImportTask, + TestCytotoxDescriptionImportTask, + CompoundCytotoxicityResultImportTask, + TestPKDescriptionImportTask, + CompoundPKResultImportTask + ] def add_arguments(self, parser): + task_choices = [task_class.option for task_class in self.task_classes] + task_choices.append('all') + task_help = 'Import task to be run.\n ' + task_help += ',\n \n'.join([task_class.option + ': ' + task_class.description for task_class in self.task_classes]) + task_help += ',\n \nall: import everything.' parser.add_argument( - '--bibliographies', - action='store_true', - dest='bibliographies', - default=False, - help='Flush and migrate bibliographies', + 'task', + type=str, + choices=task_choices, + help=task_help, ) parser.add_argument( - '--proteins', + '--all', action='store_true', - dest='proteins', + dest='all', default=False, - help='Flush and migrate proteins', + help='import everything', ) parser.add_argument( - '--domains', + '--wscache', action='store_true', - dest='domains', + dest='wscache', default=False, - help='Flush and migrate domains', + help='use web services cache', ) parser.add_argument( - '--symmetries', + '--errortb', action='store_true', - dest='symmetries', + dest='errortb', default=False, - help='Flush and create symmetries', + help='show tracebacks on errors', ) parser.add_argument( - '--ppi', - action='store_true', - dest='ppi', - default=False, - help='Flush and migrate ppis, complexes, and tests', - ) - parser.add_argument( - '--compound', + '--stoponfail', action='store_true', - dest='compound', + dest='stoponfail', default=False, - help='Flush and migrate compounds', + help='stop on first error', ) parser.add_argument( - '--compound-biblio', + '--progressbar', action='store_true', - dest='compoundbiblio', + dest='progress_bar', default=False, - help='Flush and migrate compounds-bibliography', + help='show progressbar instead of logging inserted entries', ) parser.add_argument( - '--stoponfail', + '--check', action='store_true', - dest='stoponfail', + dest='check', default=False, - help='Stop on fail', + help='only check instead of running import', ) def handle(self, *args, **options): conn = mysql.connector.connect( converter_class=MyConverter, host="localhost", user="root", password="ippidb", database="ippidb") - cursor = conn.cursor() - cursor_aux = conn.cursor(buffered=True) - cursor_aux2 = conn.cursor(buffered=True) - if options['bibliographies']: - cursor.execute("""SELECT * FROM biblio""") - rows = cursor.fetchall() - Bibliography.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed bibliography table')) - for row in rows: - try: - b = Bibliography() - if row[1] == 'article': - b.source = 'PM' - else: - b.source = 'PT' - b.id_source = row[2] - b.cytotox = row[6]=='Y' - b.in_silico = row[7]=='Y' - b.in_vitro = row[8]=='Y' - b.in_vivo = row[9]=='Y' - b.in_cellulo = row[10]=='Y' - b.pharmacokinetic = row[11]=='Y' - b.xray = row[12]=='Y' - b.save(autofill=True) - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2]))) - if options['proteins']: - cursor.execute("""SELECT * FROM protein""") - rows = cursor.fetchall() - Protein.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed protein table')) - Taxonomy.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed taxonomy table')) - MolecularFunction.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed molecular function table')) - for row in rows: - try: - p = Protein() - p.uniprot_id = row[1] - p.save(autofill=True) - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2]))) - if options['domains']: - cursor.execute("""SELECT * FROM domain""") - rows = cursor.fetchall() - Domain.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed domain table')) - for row in rows: - try: - p = Domain() - p.pfam_acc = row[2] - p.domain_family = row[4] - p.save(autofill=True) - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2]))) - if options['symmetries']: - Symmetry.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed symmetries table')) - rows = [ - ['AS', 'asymmetric'], - ['C2', 'C2 symmetry'], - ['D2', 'D2 symmetry'], - ['C3', 'C3 symmetry'], - ['D3', 'D3 symmetry'], - ['C4', 'C4 symmetry'], - ['D4', 'D4 symmetry'], - ['C5', 'C5 symmetry'], - ['D5', 'D5 symmetry'], - ] - for row in rows: - try: - symmetry = Symmetry() - symmetry.code = row[0] - symmetry.description = row[1] - symmetry.save() - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[0], row[1])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[0], row[1]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {} {}'.format(row[0], row[1]))) - if options['ppi']: - sql_request_string = ''' -select distinct protein.NumUniprot, domain.PfamNumAccession, complexe.NbCopy, cmpdAction.IDComplexeBound, bindingSiteEvidence.CodePDB, - 'part1', ppi.IDPPI, disease.Disease, complexe.IDComplexe, ppi.Family from bindingSite inner join ppi on (bindingSite.IDBindingSite=ppi.IDBindingSite1) - inner join complexe on (ppi.IDComplexe1=complexe.IDComplexe) left outer join cmpdAction on (complexe.IDComplexe=cmpdAction.IDComplexeBound) - inner join protein on (bindingSite.IDProtein=protein.IDProtein) inner join domain on (bindingSite.IDDomain=domain.IDDomain) - inner join disease on (disease.IDPPI=ppi.IDPPI) left outer join bindingSiteEvidence on (ppi.IDPPI=bindingSiteEvidence.IDPPI) -union -select distinct protein.NumUniprot, domain.PfamNumAccession , complexe.NbCopy, cmpdAction.IDComplexeBound, null, - 'part2', ppi.IDPPI, disease.Disease, complexe.IDComplexe, ppi.Family from bindingSite inner join ppi on (bindingSite.IDBindingSite=ppi.IDBindingSite2) - inner join complexe on (ppi.IDComplexe2=complexe.IDComplexe) left outer join cmpdAction on (complexe.IDComplexe=cmpdAction.IDComplexeBound) - inner join protein on (bindingSite.IDProtein=protein.IDProtein) inner join domain on (bindingSite.IDDomain=domain.IDDomain) - inner join disease on (disease.IDPPI=ppi.IDPPI) - ''' - cursor.execute(sql_request_string) - rows = cursor.fetchall() - ProteinDomainBoundComplex.objects.all().delete() - ProteinDomainPartnerComplex.objects.all().delete() - Disease.objects.all().delete() - Ppi.objects.all().delete() - PpiFamily.objects.all().delete() - PpiComplex.objects.all().delete() - CompoundAction.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed protein domain complex, PPI, PPI families, compound actions, and disease tables')) - ppi_ids_mapping = {} - protein_domain_bound_complexes_mapping = {} - for row in rows: - try: - # create or retrieve Ppi object - if row[5] == 'part1': - ppi = Ppi() - disease, created = Disease.objects.get_or_create( - name=row[7]) - ppi_family, created = PpiFamily.objects.get_or_create( - name=row[9].strip()) - ppi.family = ppi_family - ppi.pdb_id = row[4] - ppi.pockets_nb = 1 - ppi.symmetry = Symmetry.objects.get(code='AS') - ppi.save() - ppi.diseases.add(disease) - ppi.save() - ppi_ids_mapping[row[6]] = ppi.id - else: - ppi = Ppi.objects.get(id=ppi_ids_mapping[row[6]]) - # create a complex - if row[3] is None: - c = ProteinDomainPartnerComplex() - else: - c = ProteinDomainBoundComplex() - protein = Protein.objects.get(uniprot_id=row[0]) - c.protein = protein - domain = Domain.objects.get(pfam_acc=row[1]) - c.domain = domain - c.ppc_copy_nb = row[2] - if isinstance(c, ProteinDomainBoundComplex): - c.ppp_copy_nb_per_p = 1 - c.save() - # save ProteinDomainBoundComplex/source ID in 'complexe' table - if row[3] is not None: - protein_domain_bound_complexes_mapping[row[8]] = c - # create the PpiComplex object - ppi_complex = PpiComplex() - ppi_complex.ppi = ppi - ppi_complex.complex = c - ppi_complex.cc_nb = 1 - ppi_complex.save() - if row[3] is not None: - sql_ca_string = '''select distinct c.CanonicalSmile from cmpdAction as ca - inner join compound as c on ca.IDCompound=c.IDCompound where ca.IDComplexeBound='''\ - + str(row[3]) - cursor_aux.execute(sql_ca_string) - rows_aux = cursor_aux.fetchall() - for row_aux in rows_aux: - if row_aux is None: - self.stdout.write(sql_ca_string) - continue - canonical_smile = row_aux[0] - sql_bse_string = '''select CodePDB from bindingSiteEvidence where IDPPI=''' + str(ppi.id) - cursor_aux2.execute(sql_bse_string) - row_aux2 = cursor_aux2.fetchone() - if row_aux2 is None: - pdb_id_ca = None - else: - pdb_id_ca = row_aux2[0] - c = Compound.objects.get(canonical_smile=canonical_smile) - ca = CompoundAction() - ca.compound = c - ca.ppi = ppi - ca.activation_mode = 'U' - ca.nb_copy_compounds = 1 - ca.pdb_id = pdb_id_ca - ca.save() - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[0], row[1])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[0], row[1]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {} {}'.format(row[0], row[1]))) - # add names to PPIs automatically once the PPI complexes which the names - # depend on have been inserted - for ppi in Ppi.objects.all(): - ppi.save(autofill=True) - additional_cas = [ - {'compound_id':15, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':36, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':57, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':69, 'ppi_name': 'IL2 / IL2RA'}, - {'compound_id':78, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':110, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':111, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':120, 'ppi_name': 'XIAP / DBLOH'}, - {'compound_id':124, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':144, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':171, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':213, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':221, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':238, 'ppi_name': 'IL2 / IL2RA'}, - {'compound_id':272, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':305, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':316, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':354, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':355, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':399, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':406, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':409, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':412, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':458, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':463, 'ppi_name': 'IL2 / IL2RA'}, - {'compound_id':465, 'ppi_name': 'XIAP / DBLOH'}, - {'compound_id':469, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':477, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':485, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':487, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':490, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':538, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':563, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':585, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':590, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':626, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':666, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':683, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':702, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':705, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':714, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':734, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':769, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':786, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':801, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':814, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':817, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':886, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':971, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':977, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':999, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1011, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1026, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1031, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1055, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1056, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1069, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1115, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1122, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1126, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1153, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1155, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1160, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1202, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1221, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1251, 'ppi_name': 'XIAP / DBLOH'}, - {'compound_id':1268, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1280, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1296, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1297, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1310, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1313, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1317, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1376, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1398, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1402, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1403, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1407, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1434, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1453, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1477, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1481, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1504, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1514, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1553, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1594, 'ppi_name': 'BCL2 / BAK'}, - {'compound_id':1648, 'ppi_name': 'MDM2 / P53'}, - {'compound_id':1649, 'ppi_name': 'MDM2 / P53'}, - ] - for ca_props in additional_cas: - try: - ca = CompoundAction() - ca.compound = Compound.objects.get(id=ca_props['compound_id']) - ca.ppi = Ppi.objects.get(name=ca_props['ppi_name']) - ca.activation_mode = 'U' - ca.nb_copy_compounds = 1 - ca.save() - self.stdout.write( - self.style.SUCCESS('Successfully inserted CompoundAction {}'.format(ca))) - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting CompoundAction for compound {} and ppi {}'.format(ca_props['compound_id'], ca_props['ppi_name'])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting CompoundAction for compound {} and ppi {}'.format(ca_props['compound_id'], ca_props['ppi_name']))) - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting CompoundAction for compound {} and ppi {}'.format(ca_props['compound_id'], ca_props['ppi_name'])) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted CompoundAction {}'.format(ca))) - CellLine.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed cell lines table')) - cursor.execute("""SELECT * FROM testActivityDescription""") - rows = cursor.fetchall() - TestActivityDescription.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed test activity descriptions table')) - tad_id_mapping = {} - for row in rows: - try: - tad = TestActivityDescription() - cursor.execute( - """select IDSource from biblio where IDBiblio={}""".format(row[2])) - biblio_row = cursor.fetchone() - biblio = Bibliography.objects.get(id_source=biblio_row[0]) - tad.biblio = biblio - tad.protein_domain_bound_complex = protein_domain_bound_complexes_mapping[row[1]] - tad.ppi = Ppi.objects.get(id=ppi_ids_mapping[row[3]]) - tad.test_name = row[4] - tad.is_primary = row[5]=='Y' - if row[6]=='Unspecified': - tad.protein_bound_construct = 'U' - elif row[6]=='Full length': - tad.protein_bound_construct = 'F' - tad.test_type = row[7].upper() - tad.test_modulation_type = row[8][0] - tad.nb_active_compounds = row[9] - if row[16] is not None: - tad.cell_line, created = CellLine.objects.get_or_create( - name=row[16]) - tad.save() - tad_id_mapping[row[0]] = tad.id - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {}'.format(row[2]))) - cursor.execute("""select c.CanonicalSmile, r.IDTestActivity, r.ActivityType, r.Activity, r.PourcentInhib from cmpdActiveResult as r inner join compound as c on r.IDCompound = c.IDCompound;""") - rows = cursor.fetchall() - CompoundActivityResult.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed compound activity result table')) - for row in rows: - try: - car = CompoundActivityResult() - car.test_activity_description = TestActivityDescription.objects.get(id=tad_id_mapping[row[1]]) - car.compound = Compound.objects.get(canonical_smile=row[0]) - car.activity_type = row[2] - car.activity = row[3] - car.inhibition_percentage = row[4] - car.modulation_type = 'I' # because previous DB is only about inhibitors - car.save() - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {}'.format(str(car)))) - cursor.execute("""SELECT * FROM testCytotoxDescription""") - rows = cursor.fetchall() - TestCytotoxDescription.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed test cytotoxicity descriptions table')) - tcd_id_mapping = {} - for row in rows: - try: - tcd = TestCytotoxDescription() - cursor.execute( - """select IDSource from biblio where IDBiblio={}""".format(row[1])) - biblio_row = cursor.fetchone() - biblio = Bibliography.objects.get(id_source=biblio_row[0]) - tcd.biblio = biblio - tcd.test_name = row[2] - tcd.compound_concentration = row[4] - if row[3] is not None: - tcd.cell_line, created = CellLine.objects.get_or_create( - name=row[3]) - tcd.save() - tcd_id_mapping[row[0]] = tcd.id - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {}'.format(row[2]))) - cursor.execute("""select c.CanonicalSmile, r.IDTestCytotox, r.IDTestCytotox from cmpdCytotoxResult as r inner join compound as c on r.IDCompound = c.IDCompound;""") - rows = cursor.fetchall() - CompoundCytotoxicityResult.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed compound cytotoxicity result table')) - for row in rows: - try: - ccr = CompoundCytotoxicityResult() - ccr.test_cytotoxicity_description = TestCytotoxDescription.objects.get(id=tcd_id_mapping[row[1]]) - ccr.compound = Compound.objects.get(canonical_smile=row[0]) - ccr.toxicity = row[2] == 'Y' - ccr.save() - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - import traceback - self.stderr.write(traceback.format_exc()) - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {}'.format(str(ccr)))) - cursor.execute("""SELECT * FROM testPKDescription""") - rows = cursor.fetchall() - TestPKDescription.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed test PK descriptions table')) - tpd_id_mapping = {} - for row in rows: - try: - tpd = TestPKDescription() - cursor.execute( - """select IDSource from biblio where IDBiblio={}""".format(row[1])) - biblio_row = cursor.fetchone() - biblio = Bibliography.objects.get(id_source=biblio_row[0]) - tpd.biblio = biblio - tpd.test_name = row[2] - #tcd.compound_concentration = row[4] - try: - taxonomy = Taxonomy.objects.get(taxonomy_id=10090) - except Taxonomy.DoesNotExist: - taxonomy = Taxonomy() - taxonomy.taxonomy_id = 10090 - # dirty hack: all organisms in this table are "mice", - # hence assuming Mus musculus - taxonomy.save(autofill=True) - tpd.organism = taxonomy - tpd.administration_mode = row[4] - tpd.concentration = row[5] - tpd.dose = row[6] - tpd.dose_interval = row[7] - tpd.save() - tpd_id_mapping[row[0]] = tpd.id - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {}'.format(row[2]))) - cursor.execute("""select c.CanonicalSmile, r.IDTestPK, r.Tolerated, r.AUC, r.Clearance, r.Cmax, r.OralBioavailability, r.Tdemi, r.Tmax, r.VolDistribution from cmpdPKResult as r inner join compound as c on r.IDCompound = c.IDCompound;""") - rows = cursor.fetchall() - CompoundPKResult.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed compound cytotoxicity PK table')) - for row in rows: - try: - cpr = CompoundPKResult() - cpr.test_pk_description = TestPKDescription.objects.get(id=tpd_id_mapping[row[1]]) - cpr.compound = Compound.objects.get(canonical_smile=row[0]) - cpr.tolerated = row[2] == 'Y' - cpr.auc = row[3] - cpr.clearance = row[4] - cpr.c_max = row[5] - cpr.oral_bioavailability = row[6] - cpr.t_demi = row[7] - cpr.t_max = row[8] - cpr.voldistribution = row[9] - cpr.save() - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - import traceback - self.stderr.write(traceback.format_exc()) - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {}'.format(str(cpr)))) - if options['compound']: - cursor.execute("""SELECT * FROM compound""") - rows = cursor.fetchall() - Compound.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed compound table')) - for row in rows: - try: - compound = Compound() - compound.id = row[0] - compound.canonical_smile = row[1] - compound.is_macrocycle = (row[4] == 'Y') - compound.aromatic_ratio = row[5] - compound.balaban_index = row[6] - compound.fsp3 = row[7] # Csp3Ratio - compound.gc_molar_refractivity = row[ - 10] # GCMolarRefractivity - compound.log_d = row[13] # LogD - compound.a_log_p = row[14] # ALogP - compound.mean_atom_vol_vdw = row[15] # MeanAtomVolVdW - compound.molecular_weight = row[16] # MolecularWeight - compound.nb_acceptor_h = row[17] # NbAcceptorH - compound.nb_aliphatic_amines = row[ - 18] # NbAliphaticsAmines - compound.nb_aromatic_bonds = row[19] # NbAromaticBonds - compound.nb_aromatic_ether = row[20] # NbAromaticsEther - compound.nb_aromatic_sssr = row[21] # NbAromaticsSSSR - compound.nb_atom = row[22] # NbAtom - compound.nb_atom_non_h = row[23] # NbAtomNonH - compound.nb_benzene_like_rings = row[24] # NbBenzLikeRings - compound.nb_bonds = row[25] # NbBonds - compound.nb_bonds_non_h = row[26] # NbBondsNonH - compound.nb_br = row[27] # NbBr - compound.nb_c = row[28] # NbC - compound.nb_chiral_centers = row[29] # NbChiralCenters - compound.nb_circuits = row[30] # NbCircuits - compound.nb_cl = row[31] # NbCl - compound.nb_csp2 = row[32] # NbCsp2 - compound.nb_csp3 = row[33] # NbCsp3 - compound.nb_donor_h = row[34] # NbDonorH - compound.nb_double_bonds = row[35] # NbDoubleBonds - compound.nb_f = row[36] # NbF - compound.nb_i = row[37] # NbI - compound.nb_multiple_bonds = row[38] # NbMultBonds - compound.nb_n = row[39] # NbN - compound.nb_o = row[40] # NbO - compound.nb_rings = row[41] # NbRings - compound.nb_rotatable_bonds = row[42] # NbRotatableBonds - compound.randic_index = row[44] # RandicIndex - compound.rdf070m = row[45] # RDF070m - compound.rotatable_bond_fraction = row[ - 46] # RotatableBondFraction - compound.sum_atom_polar = row[47] # SumAtomPolar - compound.sum_atom_vol_vdw = row[48] # SumAtomVolVdW - compound.tpsa = row[51] # TPSA - compound.ui = row[52] # Ui - compound.wiener_index = row[54] # WienerIndex - if row[55] != 'N': - compound.common_name = row[55] # CmpdNameSh - compound.pubchem_id = row[56] # IdPubchem - if row[57] != 'N': - compound.chemspider_id = row[57] # IdPubchem - compound.chembl_id = row[58] - compound.iupac_name = row[59] - compound.save(autofill=True) - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2]))) - if options['compoundbiblio']: - cursor.execute("""select r.CmpdNameInBiblio, c.CanonicalSmile, b.IDSource from refCmpdBiblio as r inner join compound as c on r.IDCompound=c.IDCompound inner join biblio as b on r.IDBiblio=b.IDBiblio;""") - rows = cursor.fetchall() - RefCompoundBiblio.objects.all().delete() - self.stdout.write( - self.style.SUCCESS('Successfully flushed compound-biblio table')) - for row in rows: - try: - c = Compound.objects.get(canonical_smile=row[1]) - b = Bibliography.objects.get(id_source=row[2]) - r = RefCompoundBiblio() - r.compound_id = c.id - r.bibliography_id = b.id - r.compound_name = '-'.join(re.split('[-.]',row[0])[1:]) - # mysql format for this field is [PMID/WIPOID]-name: we remove the first part - # sometimes there is a . instead of the dash - # sometimes we have more than one - - r.save() - except Exception as e: - if options['stoponfail']: - import traceback - self.stderr.write(traceback.format_exc()) - raise CommandError( - 'Failed inserting {} {}'.format(row[1], row[2])) - else: - self.stdout.write( - self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2]))) - else: - self.stdout.write( - self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2]))) + if options.get('wscache'): + requests_cache.install_cache('ws_cache') + task_option = options.get('task') + # map task names to task classes + option_to_task = {task_class.option: task_class for task_class in self.task_classes} + # map task names to the list of task names that depend upon them + dependency_to_task = {task_class.option: [] for task_class in self.task_classes} + for task_class in self.task_classes: + for task_depency_class in task_class.depends_on: + dependency_to_task[task_depency_class.option].append(task_class.option) + dependency_to_task['all'] = option_to_task.keys() + def append_dependencies(option): + dependencies = set([option]) + for depending_option in dependency_to_task.get(option,[]): + dependencies.update(append_dependencies(depending_option)) + return dependencies + if task_option: + # list of tasks to run because they depend on initial option, + # directly or not + dependencies = append_dependencies(task_option) + print(dependencies) + dependencies_classes = {task_class for opt, task_class in option_to_task.items() if opt in dependencies} + conn = mysql.connector.connect( + converter_class=MyConverter, host="localhost", user="root", password="ippidb", database="ippidb") + while len(dependencies_classes)>0: + for task_class in copy.copy(dependencies_classes): + if not(set(task_class.depends_on) & dependencies_classes): + task = task_class(self, options['errortb'], options['stoponfail'], options['progress_bar']) + if hasattr(task, 'set_mysql_conn'): + task.set_mysql_conn(conn) + task.run() + dependencies_classes.remove(task_class) diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py index 7e6f9f2a401d03c0d8f7739eb00cdc698bce895b..5fa8acdd5362703f5fc156f3919ec608f5b424db 100644 --- a/ippisite/ippidb/models.py +++ b/ippisite/ippidb/models.py @@ -1205,6 +1205,9 @@ class RefCompoundBiblio(models.Model): class Meta: unique_together = (('compound', 'bibliography'),) + def __str__(self): + return 'Ref. {} on {}'.format(self.bibliography, self.compound) + class DrugBankCompound(models.Model): """