models.py 51 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.contrib.auth import get_user_model
12
from django.core.exceptions import ValidationError
13
from django.db import models, transaction
14
from django.db.models import FloatField, IntegerField, BooleanField
15
from django.db.models import Max, Count, F, Q, Case, When, Subquery, OuterRef
16
from django.db.models.functions import Cast
17
from django.urls import reverse
18
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
19

20
from .utils import FingerPrinter, smi2inchi, smi2inchikey
21
22
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, \
    get_taxonomy_info, get_go_info, get_pfam_info, get_doi_info
23

Hervé  MENAGER's avatar
Hervé MENAGER committed
24

25
26
27
28
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
29
30
    The save method allows to either include autofill or not. in autofill kwarg
    is set to True, save() will first call autofill(), otherwise it won't
31
32
33
34
35
36
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
37
38
39
        auto_fill_needed = not self.is_autofill_done()
        if kwargs.get('autofill') is True or auto_fill_needed:
            auto_fill_needed = True
40
            self.autofill()
41
42
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
43
        super(AutoFillableModel, self).save(*args, **kwargs)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
44
        if auto_fill_needed:
45
            self.autofill_post_save()
46

47
48
49
    def autofill(self):
        raise NotImplementedError()

50
51
    def autofill_post_save(self):
        """
52
53
        method called after the save is done, usefull for setting m2m
        relations
54
55
56
57
        :return:
        """
        pass

58
59
60
    def is_autofill_done(self):
        return True

61
62

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
64
65
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
66
67
    """
    SOURCES = (
68
69
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
70
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
71
    )
72
73
74
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
75
        DO=re.compile("^10.\d{4,9}/.+$"),
76
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
77
    source = models.CharField(
78
79
        'Bibliographic type', max_length=2, choices=SOURCES, 
        default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
80
    id_source = models.CharField('Bibliographic ID', max_length=25)
Hervé  MENAGER's avatar
Hervé MENAGER committed
81
82
83
    title = models.TextField('Title')
    journal_name = models.TextField('Journal name', null=True, blank=True)
    authors_list = models.TextField('Authors list')
Hervé  MENAGER's avatar
Hervé MENAGER committed
84
    biblio_year = models.PositiveSmallIntegerField('Year')
85
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
86
87
88
89
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
90
91
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
92
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
93

94
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
95
96
97
98
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
99
100
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
101
        elif self.source == 'PT':
102
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
103
104
105
106
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
107
108
109
110
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
111

112
113
114
    def is_autofill_done(self):
        return len(self.title) > 0

115
116
    def clean(self):
        super().clean()
117
118
119
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
120
        return self.source == 'PM' or self.source == 'DO'
121
122
123
124

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
125
126
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
127
128
129
130
131

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
132
133
            raise ValidationError(
                dict(
134
135
136
                    id_source=_(
                        f"Must match pattern {id_source_validator.pattern}"
                        " for this selected source")
137
138
                )
            )
139
        return True
140

Hervé  MENAGER's avatar
Hervé MENAGER committed
141
    class Meta:
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
        verbose_name_plural = "Bibliographies"
        verbose_name = "Bibliography"

    def data_and_study(self):
        ret = []
        for f in [
            "cytotox",
            "xray",
            "in_silico",
            "in_vitro",
            "in_cellulo",
            "in_vivo",
            "pharmacokinetic",
        ]:
            if getattr(self, f, False):
                ret.append(self._meta.get_field(f).verbose_name.title())
        return ", ".join(ret)
Hervé  MENAGER's avatar
Hervé MENAGER committed
159

160
161
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
162

163
164
165
    def get_absolute_url(self):
        return reverse('biblio-view', kwargs={'biblio_pk': self.pk})

Hervé  MENAGER's avatar
Hervé MENAGER committed
166

167
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
168
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
169
    Taxonomy IDs (from NCBI Taxonomy)
Hervé  MENAGER's avatar
Hervé MENAGER committed
170
171
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
172
173
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
174
    name = models.CharField('Organism name', max_length=200)
175

176
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
177
178
179
180
        """
        fetch information from external services
        (NCBI Entrez)
        """
181
182
183
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

184
185
186
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
187
188
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
189

Hervé  MENAGER's avatar
Hervé MENAGER committed
190

191
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
192
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
193
    Molecular functions (from Gene Ontology)
Hervé  MENAGER's avatar
Hervé MENAGER committed
194
195
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
196
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
197
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
198
199
    description = models.CharField('description', max_length=500)

200
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
201
202
203
204
        """
        fetch information from external services
        (EBI OLS)
        """
205
206
207
        info = get_go_info(self.go_id)
        self.description = info['label']

208
209
210
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

211
212
213
214
    @property
    def name(self):
        return self.go_id + ' ' + self.description

215
216
217
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
218

219
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
220
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
221
    Protein information (from Uniprot)
Hervé  MENAGER's avatar
Hervé MENAGER committed
222
223
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
224
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
225
226
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
227
    short_name = models.CharField('Short name', max_length=50)
228
    gene_name = models.CharField('Gene name', max_length=30)
Hervé  MENAGER's avatar
Hervé MENAGER committed
229
    entry_name = models.CharField('Entry name', max_length=30)
230
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
231
    molecular_functions = models.ManyToManyField(MolecularFunction)
232
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
233

234
    @transaction.atomic
235
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
236
237
238
239
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
240
        info = get_uniprot_info(self.uniprot_id)
241
        self.recommended_name_long = info['recommended_name']
242
243
244
245
246
247
248
249
250
251

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

252
        self.entry_name = info['entry_name']
253
        self.short_name = info['short_name']
254
255
256
257
258
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
259
            taxonomy.save(autofill=True)
260
        self.organism = taxonomy
261
        self.__info = info
262

263
264
    def autofill_post_save(self):
        info = self.__info
265
        for go_id in info['molecular_functions']:
266
267
            mol_function, created = MolecularFunction.objects.get_or_create(
                go_id=go_id)
268
            self.molecular_functions.add(mol_function)
269

270
        for domain_id in info['domains']:
271
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
272
273
            self.domains.add(domain)

274
275
276
    def is_autofill_done(self):
        return len(self.gene_name) > 0

277
278
279
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
280

281
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
282
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
283
    Domain (i.e. Protein domain) information (from PFAM)
Hervé  MENAGER's avatar
Hervé MENAGER committed
284
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
285
286
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
287
    pfam_description = models.CharField('Pfam Description', max_length=100)
288
289
    domain_family = models.CharField('Domain family', max_length=25,
                                     blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
290

Hervé  MENAGER's avatar
Hervé MENAGER committed
291
292
    # TODO: what is this field? check database
    # contents
293

294
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
295
296
297
298
        """
        fetch information from external services
        (PFAM)
        """
299
300
301
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
302

303
304
305
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

306
307
308
309
    @property
    def name(self):
        return self.pfam_id

310
    def __str__(self):
311
        return f'{self.pfam_acc} ({self.pfam_id}-{self.pfam_description})'
312

Hervé  MENAGER's avatar
Hervé MENAGER committed
313

314
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
315
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
316
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
317
    """
318
    protein = models.ForeignKey('Protein', models.CASCADE)
319
    domain = models.ForeignKey('Domain', models.CASCADE, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
320
321
322
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
323
324
    class Meta:
        verbose_name_plural = "complexes"
325

326
    def __str__(self):
327
        return '{} {}-{}'.format(self.protein.short_name, self.protein_id, self.domain_id)
328

329
330
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
331

332

333
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
334
335
336
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
337
    ppp_copy_nb_per_p = models.IntegerField(
338
339
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
340

341
342
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
343
344


345
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
346
347
348
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
349

350
351
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
352

Hervé  MENAGER's avatar
Hervé MENAGER committed
353

354
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
355
356
357
    """
    Symmetry of a PPI
    """
358
359
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
360

361
362
363
    class Meta:
        verbose_name_plural = "symmetries"

364
365
366
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

367
368

class Disease(models.Model):
369
370
    name = models.CharField('Disease', max_length=256)
    identifier = models.CharField('Identifier', max_length=32, null=True, blank=True)
371
372

    def __str__(self):
373
        return '%s (%s)' % (self.name, self.identifier)
374

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
375

Hervé  MENAGER's avatar
Hervé MENAGER committed
376
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
377
378
379
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
380
381
    name = models.CharField('Name', max_length=30, unique=True)

382
383
384
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
385
386
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
387

Hervé  MENAGER's avatar
Hervé MENAGER committed
388

389
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
390
391
392
    """
    PPI
    """
393
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
394
395
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
396
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
397
    diseases = models.ManyToManyField(Disease, blank=True)
398
399
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True,
                               blank=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
400
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
401

402
    def __str__(self):
403
        return 'PPI #{} on {}'.format(self.id, self.name)
404

405
406
407
    def get_absolute_url(self):
        return reverse('ppi-view', kwargs={'ppi_pk': self.pk})

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
408
    def is_autofill_done(self):
409
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
410

411
412
413
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
414
415
416
417
418

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
419
420
421
422
        return PpiComplex.objects.filter(
            ppi=self,
            complex__in=ProteinDomainBoundComplex.objects.all()
            )
423

424
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
425
        all_protein_names = set(
426
427
428
            [
                ppi_complex.complex.protein.short_name
                for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
429
        bound_protein_names = set(
430
431
432
            [
                ppi_complex.complex.protein.short_name
                for ppi_complex in self.get_ppi_bound_complexes()])
433
434
435
436
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
437
        if partner_str != '':
438
439
            name += ' / ' + partner_str
        return name
440

Hervé  MENAGER's avatar
Hervé MENAGER committed
441

Hervé  MENAGER's avatar
Hervé MENAGER committed
442
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
443
444
445
    """
    PPI Complex
    """
446
447
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
448
    cc_nb = models.IntegerField(
449
450
451
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
452
453
454
455

    class Meta:
        verbose_name_plural = "Ppi complexes"

456
457
458
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

459
class ValidatedCompoundsManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
460
    """
461
462
463
    ValidatedCompoundManager filters only compounds from validated
    contributions (or not coming from contributions) in the results
    of the database query
Hervé  MENAGER's avatar
Hervé MENAGER committed
464
    """
465
466
    def get_queryset(self):
        return super().get_queryset().exclude(compoundaction__ppi__contribution__validated=False)
467

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
468

469
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
470
471
472
    """
    Chemical compound
    """
473
474
475
476

    objects = models.Manager()
    validated = ValidatedCompoundsManager()

477
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
478
        verbose_name='Canonical Smiles',
479
480
        unique=True,
    )
481
    is_macrocycle = models.BooleanField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
482
483
        verbose_name=_('is_macrocycle_verbose_name'),
        help_text=_('is_macrocycle_help_text'),
484
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
485
    aromatic_ratio = models.DecimalField(
486
487
488
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
489
490
        blank=True,
        null=True,
491
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
492
    balaban_index = models.DecimalField(
493
494
495
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
496
497
        blank=True,
        null=True,
498
499
500
501
502
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
503
504
        blank=True,
        null=True,
505
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
506
    gc_molar_refractivity = models.DecimalField(
507
508
509
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
510
511
        blank=True,
        null=True,
512
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
513
    log_d = models.DecimalField(
514
515
516
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
517
518
        blank=True,
        null=True,
519
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
520
    a_log_p = models.DecimalField(
521
522
523
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
524
525
        blank=True,
        null=True,
526
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
527
    mean_atom_vol_vdw = models.DecimalField(
528
529
530
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
531
532
        blank=True,
        null=True,
533
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
534
    molecular_weight = models.DecimalField(
535
536
537
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
538
539
        blank=True,
        null=True,
540
541
542
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
543
544
        blank=True,
        null=True,
545
546
547
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
548
549
        blank=True,
        null=True,
550
551
552
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
553
554
        blank=True,
        null=True,
555
556
557
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
558
559
        blank=True,
        null=True,
560
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
561
    nb_aromatic_sssr = models.IntegerField(
562
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
563
564
        blank=True,
        null=True,
565
566
567
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
568
569
        blank=True,
        null=True,
570
571
572
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
573
574
        blank=True,
        null=True,
575
576
577
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
578
579
        blank=True,
        null=True,
580
581
582
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
583
584
        blank=True,
        null=True,
585
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
586
    nb_bonds_non_h = models.IntegerField(
587
        verbose_name='Number of bonds not involving a hydrogen',
588
589
        blank=True,
        null=True,
590
591
592
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
593
594
        blank=True,
        null=True,
595
596
597
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
598
599
        blank=True,
        null=True,
600
601
602
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
603
604
        blank=True,
        null=True,
605
606
607
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
608
609
        blank=True,
        null=True,
610
611
612
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
613
614
        blank=True,
        null=True,
615
616
617
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
618
619
        blank=True,
        null=True,
620
621
622
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
623
624
        blank=True,
        null=True,
625
626
627
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
628
629
        blank=True,
        null=True,
630
631
632
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
633
634
        blank=True,
        null=True,
635
636
637
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
638
639
        blank=True,
        null=True,
640
641
642
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
643
644
        blank=True,
        null=True,
645
646
647
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
648
649
        blank=True,
        null=True,
650
651
652
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
653
654
        blank=True,
        null=True,
655
656
657
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
658
659
        blank=True,
        null=True,
660
661
662
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
663
664
        blank=True,
        null=True,
665
666
667
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
668
669
        blank=True,
        null=True,
670
671
672
    )
    inchi = models.TextField(
        verbose_name='InChi',
673
674
        blank=True,
        null=True,
675
676
677
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
678
679
        blank=True,
        null=True,
680
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
681
    randic_index = models.DecimalField(
682
683
684
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
685
686
        blank=True,
        null=True,
687
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
688
    rdf070m = models.DecimalField(
689
690
691
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
692
693
        blank=True,
        null=True,
694
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
695
    rotatable_bond_fraction = models.DecimalField(
696
697
698
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
699
700
        blank=True,
        null=True,
701
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
702
    sum_atom_polar = models.DecimalField(
703
704
705
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
706
707
        blank=True,
        null=True,
708
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
709
    sum_atom_vol_vdw = models.DecimalField(
710
711
712
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
713
714
        blank=True,
        null=True,
715
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
716
    tpsa = models.DecimalField(
717
718
719
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
720
721
        blank=True,
        null=True,
722
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
723
    ui = models.DecimalField(
724
725
726
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
727
728
        blank=True,
        null=True,
729
730
731
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
732
733
        blank=True,
        null=True,
734
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
735
    common_name = models.CharField(
736
737
738
739
740
        verbose_name='Common name',
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
741
    pubchem_id = models.CharField(
742
743
744
745
746
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
747
    chemspider_id = models.CharField(
748
749
750
751
752
753
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
754
    chembl_id = models.CharField(
755
756
757
758
759
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
760
    iupac_name = models.CharField(
761
762
763
764
765
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
766
    ligand_id = models.CharField('PDB Ligand ID', max_length=3, blank=True, null=True)
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
    pubs = models.IntegerField(
        verbose_name='Number of publications', null=True, blank=True
    )    
    best_activity = models.DecimalField(
        'Best activity', max_digits=12, decimal_places=10, null=True, blank=True)
    best_activity_ppi_family_name = models.CharField('Best activity PPI family name', max_length=30, null=True, blank=True)
    le = models.FloatField(
        verbose_name='Ligand efficiency', null=True, blank=True
    )
    lle = models.FloatField(
        verbose_name='Lipophilic efficiency', null=True, blank=True
    )
    lipinsky_mw = models.BooleanField('MW ok for Lipinsky', null=True, blank=True)
    lipinsky_hba = models.BooleanField('Hydrogen bond acceptors ok for Lipinsky', null=True, blank=True)
    lipinsky_hbd = models.BooleanField('Hydrogen bond donors ok for Lipinsky', null=True, blank=True)
    lipinsky_a_log_p = models.BooleanField('A log P ok for Lipinsky', null=True, blank=True)
    lipinsky_score = models.IntegerField(
        verbose_name='Lipinsky score', null=True, blank=True
    )
    lipinsky = models.BooleanField('Lipinsky ok', null=True, blank=True)
    hba_hbd = models.IntegerField(
        verbose_name='Sum of Hydrogen bond acceptors and donors', null=True, blank=True
    )
    veber_hba_hbd = models.BooleanField('HBA+HBD ok for Veber', null=True, blank=True)
    veber_tpsa = models.BooleanField('TPSA ok for Veber', null=True, blank=True)
    veber_rb = models.BooleanField('Rotatable bonds ok for Veber', null=True, blank=True)
    veber = models.BooleanField('Veber ok', null=True, blank=True)
    pfizer_a_log_p = models.BooleanField('A log P ok for Pfizer', null=True, blank=True)
    pfizer_tpsa = models.BooleanField('TPSA ok for Pfizer', null=True, blank=True)
    pfizer = models.BooleanField('Pfizer ok', null=True, blank=True)
    pdb_ligand_av = models.BooleanField('PDB ligand available', null=True, blank=True)
    inhibition_role = models.BooleanField('Inhibition role', null=True, blank=True)
    binding_role = models.BooleanField('Binding role', null=True, blank=True)
    stabilisation_role = models.BooleanField('Stabilisation role', null=True, blank=True)
    celltest_av = models.BooleanField('Cellular tests performed', null=True, blank=True)
    inhitest_av = models.BooleanField('Inhibition tests performed', null=True, blank=True)
    stabtest_av = models.BooleanField('Stabilisation tests performed', null=True, blank=True)
    bindtest_av = models.BooleanField('Binding tests performed', null=True, blank=True)
    pktest_av = models.BooleanField('Pharmacokinetic tests performed', null=True, blank=True)
    cytoxtest_av = models.BooleanField('Cytotoxicity tests performed', null=True, blank=True)
    insilico_av = models.BooleanField('In silico tests performed', null=True, blank=True)
    tests_av = models.IntegerField(verbose_name='Number of tests available', null=True, blank=True)

810

811
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
812
        ordering = ['id']
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
        indexes = [
            models.Index(fields=['molecular_weight']),
            models.Index(fields=['a_log_p']),
            models.Index(fields=['nb_donor_h']),
            models.Index(fields=['nb_acceptor_h']),
            models.Index(fields=['tpsa']),
            models.Index(fields=['nb_rotatable_bonds']),
            models.Index(fields=['nb_aromatic_sssr']),
            models.Index(fields=['nb_chiral_centers']),
            models.Index(fields=['fsp3']),
            models.Index(fields=['pubs']),
            models.Index(fields=['best_activity']),
            models.Index(fields=['le']),
            models.Index(fields=['lle']),
        ]
828
        #default_manager_name = 'ippidb.models.ValidatedCompoundsManager'
829
830
831
832
833
834
835
836
837
838
        # indexes = [
        #     models.Index(fields=['lipinsky']),
        #     models.Index(fields=['veber']),
        #     models.Index(fields=['pfizer']),
        #     models.Index(fields=['pdb_ligand_av']),
        #     models.Index(fields=['inhibition_role']),
        #     models.Index(fields=['binding_role']),
        #     models.Index(fields=['stabilisation_role']),
        #     models.Index(fields=['binding_role']),
        # ]
839

840
841
842
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
843
        # fingerprints to compute drugbank similarities are in settings module, default FP2
844
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
845
846
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
847
848
849
850
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
851
852
853
854
855
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
856
857
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
858
859
860
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
861
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
862
863
        """
        return RefCompoundBiblio.objects.filter(compound=self)
864

865
866
867
868
869
870
871
872
873
874
875
876
877
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

878
879
    @property
    def best_pXC50_activity(self):
880
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
881
882
883

    @property
    def best_pXC50_compound_activity_result(self):
884
        best_pXC50_activity = self.best_activity
885
886
        if best_pXC50_activity is None:
            return None
887
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
888

889
890
891
892
893
894
895
896
897
898
899
900
901
902
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

903
904
905
906
907
908
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
909

910
911
912
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
913

914
    @property
915
    def is_validated(self):
916
        return self.compoundaction_set.filter(Q(ppi__contribution__isnull=True)|Q(ppi__contribution__validated=True)).exists()
917

918
919
920
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
921
        self.inchikey = smi2inchikey(self.canonical_smile)
922
        self.compute_drugbank_compound_similarity()
923

924
925
926
    def __str__(self):
        return 'Compound #{}'.format(self.id)

927
928
929
    def get_absolute_url(self):
        return reverse('compound_card', kwargs={'pk': self.pk})

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
930
931
932
933
934
935
    def clean(self):
        if self.common_name is not None \
                and self.common_name != "" \
                and Compound.objects.filter(common_name=self.common_name).filter(~Q(pk=self.pk)).exists():
            self.add_error("common_name", "A compound with this name already exists")

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
936

937
938
939
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
940
    fingerprint = models.TextField('Fingerprint')
941
942
943
944
945
946
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4