import_v1_data.py 19.5 KB
Newer Older
1
2
3
import glob

from django.utils import timezone
4
from django.core.management import BaseCommand, CommandError
5
import mysql.connector
6
from pybel import readfile
7

8
from ippidb.models import Bibliography, Protein, Taxonomy, MolecularFunction, \
9
    Domain, ProteinDomainBoundComplex, ProteinDomainPartnerComplex, Symmetry, Ppi, PpiComplex, Disease, \
10
    Compound, MDDRCompoundImport, MDDRActivityClass, TestActivityDescription, CellLine
11

Hervé  MENAGER's avatar
Hervé MENAGER committed
12

13
14
15
16
17
18
19
20
21
22
23
24
class MyConverter(mysql.connector.conversion.MySQLConverter):

    def row_to_python(self, row, fields):
        row = super(MyConverter, self).row_to_python(row, fields)

        def to_unicode(col):
            if type(col) == bytearray:
                return col.decode('utf-8')
            return col

        return[to_unicode(col) for col in row]

Hervé  MENAGER's avatar
Hervé MENAGER committed
25

26
class Command(BaseCommand):
27

28
29
    help = "Import data from the local v1 database"

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    def add_arguments(self, parser):
        parser.add_argument(
            '--bibliographies',
            action='store_true',
            dest='bibliographies',
            default=False,
            help='Flush and migrate bibliographies',
        )
        parser.add_argument(
            '--proteins',
            action='store_true',
            dest='proteins',
            default=False,
            help='Flush and migrate proteins',
        )
45
46
47
48
49
50
51
        parser.add_argument(
            '--domains',
            action='store_true',
            dest='domains',
            default=False,
            help='Flush and migrate domains',
        )
52
        parser.add_argument(
53
            '--symmetries',
54
            action='store_true',
55
            dest='symmetries',
56
            default=False,
57
            help='Flush and create symmetries',
58
        )
59
        parser.add_argument(
60
            '--ppi',
61
            action='store_true',
62
            dest='ppi',
63
            default=False,
64
            help='Flush and migrate ppis and complexes',
65
        )
66
67
68
69
70
71
72
        parser.add_argument(
            '--mddr',
            action='store_true',
            dest='mddr',
            default=False,
            help='Flush and import MDDR database',
        )
73
74
75
76
77
78
79
        parser.add_argument(
            '--compound',
            action='store_true',
            dest='compound',
            default=False,
            help='Flush and migrate compounds',
        )
80
81
82
83
84
85
86
        parser.add_argument(
            '--test-activity-description',
            action='store_true',
            dest='test-activity-description',
            default=False,
            help='Flush and migrate test activity descriptions',
        )
87
88
89
90
91
92
93
94
        parser.add_argument(
            '--stoponfail',
            action='store_true',
            dest='stoponfail',
            default=False,
            help='Stop on fail',
        )

95
    def handle(self, *args, **options):
Hervé  MENAGER's avatar
Hervé MENAGER committed
96
97
        conn = mysql.connector.connect(
            converter_class=MyConverter, host="localhost", user="root", password="ippidb", database="ippidb")
98
        cursor = conn.cursor()
99
100
101
102
        if options['bibliographies']:
            cursor.execute("""SELECT * FROM biblio""")
            rows = cursor.fetchall()
            Bibliography.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
103
104
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed bibliography table'))
105
106
107
            for row in rows:
                try:
                    b = Bibliography()
Hervé  MENAGER's avatar
Hervé MENAGER committed
108
                    if row[1] == 'article':
109
110
111
112
                        b.source = 'PM'
                    else:
                        b.source = 'PT'
                    b.id_source = row[2]
113
                    b.save(autofill=True)
114
                except Exception as e:
Hervé  MENAGER's avatar
Hervé MENAGER committed
115
116
                    self.stdout.write(
                        self.style.ERROR('Failed inserting {}'.format(row[2])))
117
                else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
118
119
                    self.stdout.write(
                        self.style.SUCCESS('Successfully inserted {}'.format(row[2])))
120
121
122
123
        if options['proteins']:
            cursor.execute("""SELECT * FROM protein""")
            rows = cursor.fetchall()
            Protein.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
124
125
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed protein table'))
126
            Taxonomy.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
127
128
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed taxonomy table'))
129
            MolecularFunction.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
130
131
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed molecular function table'))
132
133
134
135
            for row in rows:
                try:
                    p = Protein()
                    p.uniprot_id = row[1]
136
                    p.save(autofill=True)
137
                except Exception as e:
138
139
140
                    if options['stoponfail']:
                        import traceback
                        self.stderr.write(traceback.format_exc())
Hervé  MENAGER's avatar
Hervé MENAGER committed
141
142
                        raise CommandError(
                            'Failed inserting {} {}'.format(row[1], row[2]))
143
                    else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
144
145
                        self.stdout.write(
                            self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2])))
146
                else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
147
148
                    self.stdout.write(
                        self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2])))
149
150
151
152
        if options['domains']:
            cursor.execute("""SELECT * FROM domain""")
            rows = cursor.fetchall()
            Domain.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
153
154
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed domain table'))
155
156
157
158
159
            for row in rows:
                try:
                    p = Domain()
                    p.pfam_acc = row[2]
                    p.domain_family = row[4]
160
                    p.save(autofill=True)
161
162
163
164
                except Exception as e:
                    if options['stoponfail']:
                        import traceback
                        self.stderr.write(traceback.format_exc())
Hervé  MENAGER's avatar
Hervé MENAGER committed
165
166
                        raise CommandError(
                            'Failed inserting {} {}'.format(row[1], row[2]))
167
                    else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
168
169
                        self.stdout.write(
                            self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2])))
170
                else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
171
172
                    self.stdout.write(
                        self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2])))
173
174
        if options['symmetries']:
            Symmetry.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
175
176
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed symmetries table'))
177
            rows = [
Hervé  MENAGER's avatar
Hervé MENAGER committed
178
                ['AS', 'asymmetric'],
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
                    ['C2', 'C2 symmetry'],
                    ['D2', 'D2 symmetry'],
                    ['C3', 'C3 symmetry'],
                    ['D3', 'D3 symmetry'],
                    ['C4', 'C4 symmetry'],
                    ['D4', 'D4 symmetry'],
                    ['C5', 'C5 symmetry'],
                    ['D5', 'D5 symmetry'],
            ]
            for row in rows:
                try:
                    symmetry = Symmetry()
                    symmetry.code = row[0]
                    symmetry.description = row[1]
                    symmetry.save()
                except Exception as e:
                    if options['stoponfail']:
                        import traceback
                        self.stderr.write(traceback.format_exc())
Hervé  MENAGER's avatar
Hervé MENAGER committed
198
199
                        raise CommandError(
                            'Failed inserting {} {}'.format(row[0], row[1]))
200
                    else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
201
202
                        self.stdout.write(
                            self.style.ERROR('Failed inserting {} {}'.format(row[0], row[1])))
203
                else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
204
205
                    self.stdout.write(
                        self.style.SUCCESS('Successfully inserted {} {}'.format(row[0], row[1])))
206
207
        if options['ppi']:
            sql_request_string = '''
208
select distinct protein.NumUniprot, domain.PfamNumAccession, complexe.NbCopy, cmpdAction.IDComplexeBound, bindingSiteEvidence.CodePDB, 'part1', ppi.IDPPI, disease.Disease  from bindingSite inner join ppi on (bindingSite.IDBindingSite=ppi.IDBindingSite1) inner join complexe on (ppi.IDComplexe1=complexe.IDComplexe) left outer join cmpdAction on (complexe.IDComplexe=cmpdAction.IDComplexeBound) inner join protein on (bindingSite.IDProtein=protein.IDProtein) inner join domain on (bindingSite.IDDomain=domain.IDDomain) inner join disease on (disease.IDPPI=ppi.IDPPI) left outer join bindingSiteEvidence on (ppi.IDPPI=bindingSiteEvidence.IDPPI)
Hervé  MENAGER's avatar
Hervé MENAGER committed
209
union
210
select distinct protein.NumUniprot, domain.PfamNumAccession  , complexe.NbCopy, cmpdAction.IDComplexeBound, null, 'part2', ppi.IDPPI, disease.Disease  from bindingSite inner join ppi on (bindingSite.IDBindingSite=ppi.IDBindingSite2) inner join complexe on (ppi.IDComplexe2=complexe.IDComplexe) left outer join cmpdAction on (complexe.IDComplexe=cmpdAction.IDComplexeBound) inner join protein on (bindingSite.IDProtein=protein.IDProtein) inner join domain on (bindingSite.IDDomain=domain.IDDomain) inner join disease on (disease.IDPPI=ppi.IDPPI)'''
211
212
213
214
            cursor.execute(sql_request_string)
            rows = cursor.fetchall()
            ProteinDomainBoundComplex.objects.all().delete()
            ProteinDomainPartnerComplex.objects.all().delete()
215
            Disease.objects.all().delete()
216
217
            Ppi.objects.all().delete()
            PpiComplex.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
218
219
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed protein domain complex, PPI, and disease tables'))
220
221
222
223
            ppi_ids_mapping = {}
            for row in rows:
                try:
                    # create or retrieve Ppi object
Hervé  MENAGER's avatar
Hervé MENAGER committed
224
                    if row[5] == 'part1':
225
                        ppi = Ppi()
Hervé  MENAGER's avatar
Hervé MENAGER committed
226
227
                        disease, created = Disease.objects.get_or_create(
                            name=row[7])
228
                        ppi.pdb_id = row[4]
229
                        ppi.pockets_nb = 1
230
231
                        ppi.symmetry = Symmetry.objects.get(code='AS')
                        ppi.save()
232
233
                        ppi.diseases.add(disease)
                        ppi.save()
Hervé  MENAGER's avatar
Hervé MENAGER committed
234
                        ppi_ids_mapping[row[6]] = ppi.id
235
236
237
                    else:
                        ppi = Ppi.objects.get(id=ppi_ids_mapping[row[6]])
                    # create a complex
238
                    if row[3] is None:
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
                        c = ProteinDomainPartnerComplex()
                    else:
                        c = ProteinDomainBoundComplex()
                    protein = Protein.objects.get(uniprot_id=row[0])
                    c.protein = protein
                    domain = Domain.objects.get(pfam_acc=row[1])
                    c.domain = domain
                    c.ppc_copy_nb = row[2]
                    if isinstance(c, ProteinDomainBoundComplex):
                        c.ppp_copy_nb_per_p = 1
                    c.save()
                    # create the PpiComplex object
                    ppi_complex = PpiComplex()
                    ppi_complex.ppi = ppi
                    ppi_complex.complex = c
                    ppi_complex.cc_nb = 1
                    ppi_complex.save()
                except Exception as e:
                    if options['stoponfail']:
                        import traceback
                        self.stderr.write(traceback.format_exc())
Hervé  MENAGER's avatar
Hervé MENAGER committed
260
261
                        raise CommandError(
                            'Failed inserting {} {}'.format(row[0], row[1]))
262
                    else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
263
264
                        self.stdout.write(
                            self.style.ERROR('Failed inserting {} {}'.format(row[0], row[1])))
265
                else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
266
267
                    self.stdout.write(
                        self.style.SUCCESS('Successfully inserted {} {}'.format(row[0], row[1])))
268
269
270
271
            cursor.execute("""SELECT * FROM testActivityDescription""")
            rows = cursor.fetchall()
            TestActivityDescription.objects.all().delete()
            CellLine.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
272
273
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed test activity descriptions table and cell lines table'))
274
275
276
            for row in rows:
                try:
                    tad = TestActivityDescription()
Hervé  MENAGER's avatar
Hervé MENAGER committed
277
278
                    cursor.execute(
                        """select IDSource from biblio where IDBiblio={}""".format(row[2]))
279
280
281
                    biblio_row = cursor.fetchone()
                    biblio = Bibliography.objects.get(id_source=biblio_row[0])
                    tad.biblio = biblio
282
                    tad.ppi = Ppi.objects.get(id=ppi_ids_mapping[row[3]])
283
284
285
286
287
                    tad.test_name = row[4]
                    tad.test_type = row[7].upper()
                    tad.test_modulation_type = row[8][0]
                    tad.nb_active_compounds = row[9]
                    if row[16] is not None:
Hervé  MENAGER's avatar
Hervé MENAGER committed
288
289
                        tad.cell_line, created = CellLine.objects.get_or_create(
                            name=row[16])
290
291
292
293
294
                    tad.save()
                except Exception as e:
                    if options['stoponfail']:
                        import traceback
                        self.stderr.write(traceback.format_exc())
Hervé  MENAGER's avatar
Hervé MENAGER committed
295
296
                        raise CommandError(
                            'Failed inserting {} {}'.format(row[1], row[2]))
297
                    else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
298
299
                        self.stdout.write(
                            self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2])))
300
                else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
301
302
                    self.stdout.write(
                        self.style.SUCCESS('Successfully inserted {}'.format(row[2])))
303
304
305
        if options['mddr']:
            MDDRCompoundImport.objects.all().delete()
            MDDRActivityClass.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
306
307
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed MDDR Compound and Activity class tables'))
308
309
310
311
312
313
314
315
316
317
            for sdf_file in glob.glob('/home/hmenager/iPPIDB/mddr20151_2d.sdf/*.sdf'):
                for item in readfile("sdf", sdf_file):
                    try:
                        m = MDDRCompoundImport()
                        m.mddr_name = item.data['MOLNAME']
                        m.canonical_smile = str(item)
                        m.dvpmt_phase = item.data['PHASE']
                        m.db_import_date = timezone.now()
                        m.save()
                        for activity_class_name in item.data['ACTIV_CLASS'].split(','):
Hervé  MENAGER's avatar
Hervé MENAGER committed
318
319
                            activity_class, created = MDDRActivityClass.objects.get_or_create(
                                name=activity_class_name)
320
321
322
323
324
325
                            m.activity_classes.add(activity_class)
                        m.save()
                    except Exception as e:
                        if options['stoponfail']:
                            import traceback
                            self.stderr.write(traceback.format_exc())
Hervé  MENAGER's avatar
Hervé MENAGER committed
326
327
                            raise CommandError(
                                'Failed inserting {}'.format(str(item)))
328
                        else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
329
330
                            self.stdout.write(
                                self.style.ERROR('Failed inserting {}'.format(str(item))))
331
                    else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
332
333
                        self.stdout.write(
                            self.style.SUCCESS('Successfully inserted {} {}'.format(item.data['MOLNAME'], str(item))))
334
335
336
337
        if options['compound']:
            cursor.execute("""SELECT * FROM compound""")
            rows = cursor.fetchall()
            Compound.objects.all().delete()
Hervé  MENAGER's avatar
Hervé MENAGER committed
338
339
            self.stdout.write(
                self.style.SUCCESS('Successfully flushed compound table'))
340
341
342
343
            for row in rows:
                try:
                    compound = Compound()
                    compound.canonical_smile = row[1]
Hervé  MENAGER's avatar
Hervé MENAGER committed
344
                    compound.is_macrocycle = (row[4] == 'Y')
345
346
                    compound.aromatic_ratio = row[5]
                    compound.balaban_index = row[6]
Hervé  MENAGER's avatar
Hervé MENAGER committed
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
                    compound.fsp3 = row[7]  # Csp3Ratio
                    compound.gc_molar_refractivity = row[
                        10]  # GCMolarRefractivity
                    compound.log_d = row[13]  # LogD
                    compound.a_log_p = row[14]  # ALogP
                    compound.mean_atom_vol_vdw = row[15]  # MeanAtomVolVdW
                    compound.molecular_weight = row[16]  # MolecularWeight
                    compound.nb_acceptor_h = row[17]  # NbAcceptorH
                    compound.nb_aliphatic_amines = row[
                        18]  # NbAliphaticsAmines
                    compound.nb_aromatic_bonds = row[19]  # NbAromaticBonds
                    compound.nb_aromatic_ether = row[20]  # NbAromaticsEther
                    compound.nb_aromatic_sssr = row[21]  # NbAromaticsSSSR
                    compound.nb_atom = row[22]  # NbAtom
                    compound.nb_atom_non_h = row[23]  # NbAtomNonH
                    compound.nb_benzene_like_rings = row[24]  # NbBenzLikeRings
                    compound.nb_bonds = row[25]  # NbBonds
                    compound.nb_bonds_non_h = row[26]  # NbBondsNonH
                    compound.nb_br = row[27]  # NbBr
                    compound.nb_c = row[28]  # NbC
                    compound.nb_chiral_centers = row[29]  # NbChiralCenters
                    compound.nb_circuits = row[30]  # NbCircuits
                    compound.nb_cl = row[31]  # NbCl
                    compound.nb_csp2 = row[32]  # NbCsp2
                    compound.nb_csp3 = row[33]  # NbCsp3
                    compound.nb_donor_h = row[34]  # NbDonorH
                    compound.nb_double_bonds = row[35]  # NbDoubleBonds
                    compound.nb_f = row[36]  # NbF
                    compound.nb_i = row[37]  # NbI
                    compound.nb_multiple_bonds = row[38]  # NbMultBonds
                    compound.nb_n = row[39]  # NbN
                    compound.nb_o = row[40]  # NbO
                    compound.nb_rings = row[41]  # NbRings
                    compound.nb_rotatable_bonds = row[42]  # NbRotatableBonds
                    compound.randic_index = row[44]  # RandicIndex
                    compound.rdf070m = row[45]  # RDF070m
                    compound.rotatable_bond_fraction = row[
                        46]  # RotatableBondFraction
                    compound.sum_atom_polar = row[47]  # SumAtomPolar
                    compound.sum_atom_vol_vdw = row[48]  # SumAtomVolVdW
                    compound.tpsa = row[51]  # TPSA
                    compound.ui = row[52]  # Ui
                    compound.wiener_index = row[54]  # WienerIndex
                    if row[55] != 'N':
                        compound.common_name = row[55]  # CmpdNameSh
                    compound.pubchem_id = row[56]  # IdPubchem
                    if row[57] != 'N':
                        compound.chemspider_id = row[57]  # IdPubchem
395
396
                    compound.chembl_id = row[58]
                    compound.iupac_name = row[59]
Hervé  MENAGER's avatar
Hervé MENAGER committed
397
398
                    # compound.mddr_compound = row[60]
                    # FIXME lookup MDDR to get the right ID
399
400
401
402
403
                    compound.save()
                except Exception as e:
                    if options['stoponfail']:
                        import traceback
                        self.stderr.write(traceback.format_exc())
Hervé  MENAGER's avatar
Hervé MENAGER committed
404
405
                        raise CommandError(
                            'Failed inserting {} {}'.format(row[1], row[2]))
406
                    else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
407
408
                        self.stdout.write(
                            self.style.ERROR('Failed inserting {} {}'.format(row[1], row[2])))
409
                else:
Hervé  MENAGER's avatar
Hervé MENAGER committed
410
411
                    self.stdout.write(
                        self.style.SUCCESS('Successfully inserted {} {}'.format(row[1], row[2])))