models.py 55.9 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.contrib.auth import get_user_model
12
from django.core.exceptions import ValidationError
13
from django.db import models, transaction
14
from django.db.models import FloatField, IntegerField, BooleanField
15
from django.db.models import Max, Count, F, Q, Case, When, Subquery, OuterRef
16
from django.db.models.functions import Cast
17
from django.urls import reverse
18
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
19

20
from .utils import FingerPrinter, smi2inchi, smi2inchikey
21
22
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, \
    get_taxonomy_info, get_go_info, get_pfam_info, get_doi_info
23

Hervé  MENAGER's avatar
Hervé MENAGER committed
24

25
26
27
28
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
29
30
    The save method allows to either include autofill or not. in autofill kwarg
    is set to True, save() will first call autofill(), otherwise it won't
31
32
33
34
35
36
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
37
38
39
        auto_fill_needed = not self.is_autofill_done()
        if kwargs.get('autofill') is True or auto_fill_needed:
            auto_fill_needed = True
40
            self.autofill()
41
42
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
43
        super(AutoFillableModel, self).save(*args, **kwargs)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
44
        if auto_fill_needed:
45
            self.autofill_post_save()
46

47
48
49
    def autofill(self):
        raise NotImplementedError()

50
51
    def autofill_post_save(self):
        """
52
53
        method called after the save is done, usefull for setting m2m
        relations
54
55
56
57
        :return:
        """
        pass

58
59
60
    def is_autofill_done(self):
        return True

61
62

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
64
65
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
66
67
    """
    SOURCES = (
68
69
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
70
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
71
    )
72
73
74
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
75
        DO=re.compile("^10.\d{4,9}/.+$"),
76
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
77
    source = models.CharField(
78
79
        'Bibliographic type', max_length=2, choices=SOURCES, 
        default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
80
    id_source = models.CharField('Bibliographic ID', max_length=25)
Hervé  MENAGER's avatar
Hervé MENAGER committed
81
82
83
    title = models.TextField('Title')
    journal_name = models.TextField('Journal name', null=True, blank=True)
    authors_list = models.TextField('Authors list')
Hervé  MENAGER's avatar
Hervé MENAGER committed
84
    biblio_year = models.PositiveSmallIntegerField('Year')
85
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
86
87
88
89
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
90
91
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
92
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
93

94
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
95
96
97
98
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
99
100
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
101
        elif self.source == 'PT':
102
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
103
104
105
106
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
107
108
109
110
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
111

112
113
114
    def is_autofill_done(self):
        return len(self.title) > 0

115
116
    def clean(self):
        super().clean()
117
118
119
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
120
        return self.source == 'PM' or self.source == 'DO'
121
122
123
124

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
125
126
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
127
128
129
130
131

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
132
133
            raise ValidationError(
                dict(
134
135
136
                    id_source=_(
                        f"Must match pattern {id_source_validator.pattern}"
                        " for this selected source")
137
138
                )
            )
139
        return True
140

Hervé  MENAGER's avatar
Hervé MENAGER committed
141
    class Meta:
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
        verbose_name_plural = "Bibliographies"
        verbose_name = "Bibliography"

    def data_and_study(self):
        ret = []
        for f in [
            "cytotox",
            "xray",
            "in_silico",
            "in_vitro",
            "in_cellulo",
            "in_vivo",
            "pharmacokinetic",
        ]:
            if getattr(self, f, False):
                ret.append(self._meta.get_field(f).verbose_name.title())
        return ", ".join(ret)
Hervé  MENAGER's avatar
Hervé MENAGER committed
159

160
161
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
162

163
164
165
    def get_absolute_url(self):
        return reverse('biblio-view', kwargs={'biblio_pk': self.pk})

Hervé  MENAGER's avatar
Hervé MENAGER committed
166

167
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
168
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
169
    Taxonomy IDs (from NCBI Taxonomy)
Hervé  MENAGER's avatar
Hervé MENAGER committed
170
171
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
172
173
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
174
    name = models.CharField('Organism name', max_length=200)
175

176
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
177
178
179
180
        """
        fetch information from external services
        (NCBI Entrez)
        """
181
182
183
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

184
185
186
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
187
188
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
189

Hervé  MENAGER's avatar
Hervé MENAGER committed
190

191
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
192
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
193
    Molecular functions (from Gene Ontology)
Hervé  MENAGER's avatar
Hervé MENAGER committed
194
195
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
196
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
197
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
198
199
    description = models.CharField('description', max_length=500)

200
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
201
202
203
204
        """
        fetch information from external services
        (EBI OLS)
        """
205
206
207
        info = get_go_info(self.go_id)
        self.description = info['label']

208
209
210
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

211
212
213
214
    @property
    def name(self):
        return self.go_id + ' ' + self.description

215
216
217
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
218

219
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
220
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
221
    Protein information (from Uniprot)
Hervé  MENAGER's avatar
Hervé MENAGER committed
222
223
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
224
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
225
226
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
227
    short_name = models.CharField('Short name', max_length=50)
228
    gene_name = models.CharField('Gene name', max_length=30)
Hervé  MENAGER's avatar
Hervé MENAGER committed
229
    entry_name = models.CharField('Entry name', max_length=30)
230
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
231
    molecular_functions = models.ManyToManyField(MolecularFunction)
232
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
233

234
    @transaction.atomic
235
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
236
237
238
239
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
240
        info = get_uniprot_info(self.uniprot_id)
241
        self.recommended_name_long = info['recommended_name']
242
243
244
245
246
247
248
249
250
251

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

252
        self.entry_name = info['entry_name']
253
        self.short_name = info['short_name']
254
255
256
257
258
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
259
            taxonomy.save(autofill=True)
260
        self.organism = taxonomy
261
        self.__info = info
262

263
264
    def autofill_post_save(self):
        info = self.__info
265
        for go_id in info['molecular_functions']:
266
267
            mol_function, created = MolecularFunction.objects.get_or_create(
                go_id=go_id)
268
            self.molecular_functions.add(mol_function)
269

270
        for domain_id in info['domains']:
271
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
272
273
            self.domains.add(domain)

274
275
276
    def is_autofill_done(self):
        return len(self.gene_name) > 0

277
278
279
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
280

281
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
282
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
283
    Domain (i.e. Protein domain) information (from PFAM)
Hervé  MENAGER's avatar
Hervé MENAGER committed
284
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
285
286
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
287
    pfam_description = models.CharField('Pfam Description', max_length=100)
288
289
    domain_family = models.CharField('Domain family', max_length=25,
                                     blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
290

Hervé  MENAGER's avatar
Hervé MENAGER committed
291
292
    # TODO: what is this field? check database
    # contents
293

294
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
295
296
297
298
        """
        fetch information from external services
        (PFAM)
        """
299
300
301
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
302

303
304
305
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

306
307
308
309
    @property
    def name(self):
        return self.pfam_id

310
    def __str__(self):
311
        return f'{self.pfam_acc} ({self.pfam_id}-{self.pfam_description})'
312

Hervé  MENAGER's avatar
Hervé MENAGER committed
313

314
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
315
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
316
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
317
    """
318
    protein = models.ForeignKey('Protein', models.CASCADE)
319
    domain = models.ForeignKey('Domain', models.CASCADE, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
320
321
322
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
323
324
    class Meta:
        verbose_name_plural = "complexes"
325

326
327
328
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

329
330
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
331

332

333
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
334
335
336
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
337
    ppp_copy_nb_per_p = models.IntegerField(
338
339
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
340

341
342
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
343
344


345
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
346
347
348
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
349

350
351
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
352

Hervé  MENAGER's avatar
Hervé MENAGER committed
353

354
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
355
356
357
    """
    Symmetry of a PPI
    """
358
359
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
360

361
362
363
    class Meta:
        verbose_name_plural = "symmetries"

364
365
366
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

367
368

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
369
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
370

Hervé  MENAGER's avatar
Hervé MENAGER committed
371
    # is there any database/nomenclature for diseases?
372
373
374
375

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
376

Hervé  MENAGER's avatar
Hervé MENAGER committed
377
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
378
379
380
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
381
382
    name = models.CharField('Name', max_length=30, unique=True)

383
384
385
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
386
387
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
388

Hervé  MENAGER's avatar
Hervé MENAGER committed
389

390
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
391
392
393
    """
    PPI
    """
394
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
395
396
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
397
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
398
    diseases = models.ManyToManyField(Disease, blank=True)
399
400
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True,
                               blank=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
401
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
402

403
    def __str__(self):
404
        return 'PPI #{} on {}'.format(self.id, self.name)
405

406
407
408
    def get_absolute_url(self):
        return reverse('ppi-view', kwargs={'ppi_pk': self.pk})

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
409
    def is_autofill_done(self):
410
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
411

412
413
414
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
415
416
417
418
419

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
420
421
422
423
        return PpiComplex.objects.filter(
            ppi=self,
            complex__in=ProteinDomainBoundComplex.objects.all()
            )
424

425
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
426
        all_protein_names = set(
427
428
429
            [
                ppi_complex.complex.protein.short_name
                for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
430
        bound_protein_names = set(
431
432
433
            [
                ppi_complex.complex.protein.short_name
                for ppi_complex in self.get_ppi_bound_complexes()])
434
435
436
437
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
438
        if partner_str != '':
439
440
            name += ' / ' + partner_str
        return name
441

Hervé  MENAGER's avatar
Hervé MENAGER committed
442

Hervé  MENAGER's avatar
Hervé MENAGER committed
443
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
444
445
446
    """
    PPI Complex
    """
447
448
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
449
    cc_nb = models.IntegerField(
450
451
452
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
453
454
455
456

    class Meta:
        verbose_name_plural = "Ppi complexes"

457
458
459
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
460

461
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
462
463
464
465
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
466
467
    # def get_queryset(self):
    #     return super().get_queryset().exclude(compoundaction__ppi__contribution__validated=False)
468

469
    def get_queryset_old(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
470
        # @formatter:off
471
        qs = super().get_queryset()
472
        # with number of publications
473
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
474
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
475
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
476
477
478
479
480
481
482
483
484
485
486
487
488
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
Hervé  MENAGER's avatar
Hervé MENAGER committed
489
490
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField()) + Cast(F('lipinsky_hba'), IntegerField()) +
                         Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
491
492
493
494
495
496
497
498
499
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
500
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
501
502
503
504
505
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
506
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
507
        # PDB ligand available
508
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
509
510
511
512
513
514
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
515
        # cellular tests performed
516
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
517
        # inhibition tests performed
518
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
519
        # stabilisation tests performed
520
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
521
        # binding tests performed
522
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
523
        # pharmacokinetic tests performed
524
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
525
        # cytotoxicity tests performedudy
526
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
527
        # in silico st performed
528
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
529
530
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Hervé  MENAGER's avatar
Hervé MENAGER committed
531
        # @formatter:on
532
533
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
534

535
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
536
537
538
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
539
    objects = CompoundManager()
540
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
541
        verbose_name='Canonical Smiles',
542
543
        unique=True,
    )
544
    is_macrocycle = models.BooleanField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
545
546
        verbose_name=_('is_macrocycle_verbose_name'),
        help_text=_('is_macrocycle_help_text'),
547
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
548
    aromatic_ratio = models.DecimalField(
549
550
551
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
552
553
        blank=True,
        null=True,
554
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
555
    balaban_index = models.DecimalField(
556
557
558
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
559
560
        blank=True,
        null=True,
561
562
563
564
565
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
566
567
        blank=True,
        null=True,
568
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
569
    gc_molar_refractivity = models.DecimalField(
570
571
572
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
573
574
        blank=True,
        null=True,
575
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
576
    log_d = models.DecimalField(
577
578
579
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
580
581
        blank=True,
        null=True,
582
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
583
    a_log_p = models.DecimalField(
584
585
586
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
587
588
        blank=True,
        null=True,
589
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
590
    mean_atom_vol_vdw = models.DecimalField(
591
592
593
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
594
595
        blank=True,
        null=True,
596
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
597
    molecular_weight = models.DecimalField(
598
599
600
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
601
602
        blank=True,
        null=True,
603
604
605
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
606
607
        blank=True,
        null=True,
608
609
610
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
611
612
        blank=True,
        null=True,
613
614
615
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
616
617
        blank=True,
        null=True,
618
619
620
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
621
622
        blank=True,
        null=True,
623
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
624
    nb_aromatic_sssr = models.IntegerField(
625
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
626
627
        blank=True,
        null=True,
628
629
630
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
631
632
        blank=True,
        null=True,
633
634
635
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
636
637
        blank=True,
        null=True,
638
639
640
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
641
642
        blank=True,
        null=True,
643
644
645
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
646
647
        blank=True,
        null=True,
648
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
649
    nb_bonds_non_h = models.IntegerField(
650
        verbose_name='Number of bonds not involving a hydrogen',
651
652
        blank=True,
        null=True,
653
654
655
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
656
657
        blank=True,
        null=True,
658
659
660
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
661
662
        blank=True,
        null=True,
663
664
665
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
666
667
        blank=True,
        null=True,
668
669
670
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
671
672
        blank=True,
        null=True,
673
674
675
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
676
677
        blank=True,
        null=True,
678
679
680
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
681
682
        blank=True,
        null=True,
683
684
685
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
686
687
        blank=True,
        null=True,
688
689
690
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
691
692
        blank=True,
        null=True,
693
694
695
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
696
697
        blank=True,
        null=True,
698
699
700
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
701
702
        blank=True,
        null=True,
703
704
705
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
706
707
        blank=True,
        null=True,
708
709
710
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
711
712
        blank=True,
        null=True,
713
714
715
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
716
717
        blank=True,
        null=True,
718
719
720
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
721
722
        blank=True,
        null=True,
723
724
725
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
726
727
        blank=True,
        null=True,
728
729
730
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
731
732
        blank=True,
        null=True,
733
734
735
    )
    inchi = models.TextField(
        verbose_name='InChi',
736
737
        blank=True,
        null=True,
738
739
740
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
741
742
        blank=True,
        null=True,
743
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
744
    randic_index = models.DecimalField(
745
746
747
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
748
749
        blank=True,
        null=True,
750
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
751
    rdf070m = models.DecimalField(
752
753
754
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
755
756
        blank=True,
        null=True,
757
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
758
    rotatable_bond_fraction = models.DecimalField(
759
760
761
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
762
763
        blank=True,
        null=True,
764
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
765
    sum_atom_polar = models.DecimalField(
766
767
768
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
769
770
        blank=True,
        null=True,
771
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
772
    sum_atom_vol_vdw = models.DecimalField(
773
774
775
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
776
777
        blank=True,
        null=True,
778
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
779
    tpsa = models.DecimalField(
780
781
782
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
783
784
        blank=True,
        null=True,
785
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
786
    ui = models.DecimalField(
787
788
789
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
790
791
        blank=True,
        null=True,
792
793
794
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
795
796
        blank=True,
        null=True,
797
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
798
    common_name = models.CharField(
799
800
801
802
803
        verbose_name='Common name',
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
804
    pubchem_id = models.CharField(
805
806
807
808
809
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
810
    chemspider_id = models.CharField(
811
812
813
814
815
816
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
817
    chembl_id = models.CharField(
818
819
820
821
822
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
823
    iupac_name = models.CharField(
824
825
826
827
828
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
829
    ligand_id = models.CharField('PDB Ligand ID', max_length=3, blank=True, null=True)
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
    pubs = models.IntegerField(
        verbose_name='Number of publications', null=True, blank=True
    )    
    best_activity = models.DecimalField(
        'Best activity', max_digits=12, decimal_places=10, null=True, blank=True)
    best_activity_ppi_family_name = models.CharField('Best activity PPI family name', max_length=30, null=True, blank=True)
    le = models.FloatField(
        verbose_name='Ligand efficiency', null=True, blank=True
    )
    lle = models.FloatField(
        verbose_name='Lipophilic efficiency', null=True, blank=True
    )
    lipinsky_mw = models.BooleanField('MW ok for Lipinsky', null=True, blank=True)
    lipinsky_hba = models.BooleanField('Hydrogen bond acceptors ok for Lipinsky', null=True, blank=True)
    lipinsky_hbd = models.BooleanField('Hydrogen bond donors ok for Lipinsky', null=True, blank=True)
    lipinsky_a_log_p = models.BooleanField('A log P ok for Lipinsky', null=True, blank=True)
    lipinsky_score = models.IntegerField(
        verbose_name='Lipinsky score', null=True, blank=True
    )
    lipinsky = models.BooleanField('Lipinsky ok', null=True, blank=True)
    hba_hbd = models.IntegerField(
        verbose_name='Sum of Hydrogen bond acceptors and donors', null=True, blank=True
    )
    veber_hba_hbd = models.BooleanField('HBA+HBD ok for Veber', null=True, blank=True)
    veber_tpsa = models.BooleanField('TPSA ok for Veber', null=True, blank=True)
    veber_rb = models.BooleanField('Rotatable bonds ok for Veber', null=True, blank=True)
    veber = models.BooleanField('Veber ok', null=True, blank=True)
    pfizer_a_log_p = models.BooleanField('A log P ok for Pfizer', null=True, blank=True)
    pfizer_tpsa = models.BooleanField('TPSA ok for Pfizer', null=True, blank=True)
    pfizer = models.BooleanField('Pfizer ok', null=True, blank=True)
    pdb_ligand_av = models.BooleanField('PDB ligand available', null=True, blank=True)
    inhibition_role = models.BooleanField('Inhibition role', null=True, blank=True)
    binding_role = models.BooleanField('Binding role', null=True, blank=True)
    stabilisation_role = models.BooleanField('Stabilisation role', null=True, blank=True)
    celltest_av = models.BooleanField('Cellular tests performed', null=True, blank=True)
    inhitest_av = models.BooleanField('Inhibition tests performed', null=True, blank=True)
    stabtest_av = models.BooleanField('Stabilisation tests performed', null=True, blank=True)
    bindtest_av = models.BooleanField('Binding tests performed', null=True, blank=True)
    pktest_av = models.BooleanField('Pharmacokinetic tests performed', null=True, blank=True)
    cytoxtest_av = models.BooleanField('Cytotoxicity tests performed', null=True, blank=True)
    insilico_av = models.BooleanField('In silico tests performed', null=True, blank=True)
    tests_av = models.IntegerField(verbose_name='Number of tests available', null=True, blank=True)

873

874
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
875
        ordering = ['id']
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
        indexes = [
            models.Index(fields=['molecular_weight']),
            models.Index(fields=['a_log_p']),
            models.Index(fields=['nb_donor_h']),
            models.Index(fields=['nb_acceptor_h']),
            models.Index(fields=['tpsa']),
            models.Index(fields=['nb_rotatable_bonds']),
            models.Index(fields=['nb_aromatic_sssr']),
            models.Index(fields=['nb_chiral_centers']),
            models.Index(fields=['fsp3']),
            models.Index(fields=['pubs']),
            models.Index(fields=['best_activity']),
            models.Index(fields=['le']),
            models.Index(fields=['lle']),
        ]
        # indexes = [
        #     models.Index(fields=['lipinsky']),
        #     models.Index(fields=['veber']),
        #     models.Index(fields=['pfizer']),
        #     models.Index(fields=['pdb_ligand_av']),
        #     models.Index(fields=['inhibition_role']),
        #     models.Index(fields=['binding_role']),
        #     models.Index(fields=['stabilisation_role']),
        #     models.Index(fields=['binding_role']),
        # ]
901

902
903
904
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
905
        # fingerprints to compute drugbank similarities are in settings module, default FP2
906
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
907
908
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
909
910
911
912
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
913
914
915
916
917
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))