models.py 42.6 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.contrib.auth import get_user_model
12
from django.core.exceptions import ValidationError
13
from django.db import models, transaction
14
15
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
16
from django.db.models.functions import Cast
17
from django.urls import reverse
18
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
19

20
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
21
22
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
23

Hervé  MENAGER's avatar
Hervé MENAGER committed
24

25
26
27
28
29
30
31
32
33
34
35
36
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
37
        if kwargs.get('autofill') is True or not self.is_autofill_done():
38
            self.autofill()
39
40
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
41
        super(AutoFillableModel, self).save(*args, **kwargs)
42

43
44
45
46
47
48
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

49
50

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
51
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
52
53
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
54
55
    """
    SOURCES = (
56
57
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
58
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
59
    )
60
61
62
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
63
        DO=re.compile("^10.\d{4,9}/.+$"),
64
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
66
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
67
68
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
69
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
70
71
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
72
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
73
74
75
76
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
77
78
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
79
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
80

81
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
82
83
84
85
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
86
87
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
88
        elif self.source == 'PT':
89
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
90
91
92
93
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
94
95
96
97
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
98

99
100
101
    def is_autofill_done(self):
        return len(self.title) > 0

102
103
    def clean(self):
        super().clean()
104
105
106
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
107
        return self.source == 'PM' or self.source == 'DO'
108
109
110
111

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
112
113
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
114
115
116
117
118

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
119
120
121
122
123
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
124
        return True
125

Hervé  MENAGER's avatar
Hervé MENAGER committed
126
    class Meta:
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
        verbose_name_plural = "Bibliographies"
        verbose_name = "Bibliography"

    def data_and_study(self):
        ret = []
        for f in [
            "cytotox",
            "xray",
            "in_silico",
            "in_vitro",
            "in_cellulo",
            "in_vivo",
            "pharmacokinetic",
        ]:
            if getattr(self, f, False):
                ret.append(self._meta.get_field(f).verbose_name.title())
        return ", ".join(ret)
Hervé  MENAGER's avatar
Hervé MENAGER committed
144

145
146
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
147

148
149
150
    def get_absolute_url(self):
        return reverse('biblio-view', kwargs={'biblio_pk': self.pk})

Hervé  MENAGER's avatar
Hervé MENAGER committed
151

152
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
153
154
155
156
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
157
158
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
159
    name = models.CharField('Organism name', max_length=200)
160

161
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
162
163
164
165
        """
        fetch information from external services
        (NCBI Entrez)
        """
166
167
168
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

169
170
171
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
172
173
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
174

Hervé  MENAGER's avatar
Hervé MENAGER committed
175

176
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
177
178
179
180
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
181
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
182
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
183
184
    description = models.CharField('description', max_length=500)

185
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
186
187
188
189
        """
        fetch information from external services
        (EBI OLS)
        """
190
191
192
        info = get_go_info(self.go_id)
        self.description = info['label']

193
194
195
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

196
197
198
199
    @property
    def name(self):
        return self.go_id + ' ' + self.description

200
201
202
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
203

204
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
205
206
207
208
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
209
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
210
211
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
212
213
214
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
215
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
216
    molecular_functions = models.ManyToManyField(MolecularFunction)
217
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
218

219
    @transaction.atomic
220
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
221
222
223
224
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
225
        info = get_uniprot_info(self.uniprot_id)
226
        self.recommended_name_long = info['recommended_name']
227
228
229
230
231
232
233
234
235
236

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

237
        self.entry_name = info['entry_name']
238
        self.short_name = info['short_name']
239
240
241
242
243
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
244
            taxonomy.save(autofill=True)
245
        self.organism = taxonomy
246
        super(Protein, self).save()
247

248
        for go_id in info['molecular_functions']:
249
            mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id)
250
            self.molecular_functions.add(mol_function)
251

252
        for domain_id in info['domains']:
253
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
254
255
            self.domains.add(domain)

256
257
258
    def is_autofill_done(self):
        return len(self.gene_name) > 0

259
260
261
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
262

263
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
264
265
266
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
267
268
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
269
    pfam_description = models.CharField('Pfam Description', max_length=100)
270
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
271

Hervé  MENAGER's avatar
Hervé MENAGER committed
272
273
    # TODO: what is this field? check database
    # contents
274

275
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
276
277
278
279
        """
        fetch information from external services
        (PFAM)
        """
280
281
282
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
283

284
285
286
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

287
288
289
290
    @property
    def name(self):
        return self.pfam_id

291
292
293
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
294

295
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
296
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
297
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
298
    """
299
    protein = models.ForeignKey('Protein', models.CASCADE)
300
    domain = models.ForeignKey('Domain', models.CASCADE, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
301
302
303
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
304
305
    class Meta:
        verbose_name_plural = "complexes"
306

307
308
309
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

310
311
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
312

313

314
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
315
316
317
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
318
    ppp_copy_nb_per_p = models.IntegerField(
319
320
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
321

322
323
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
324
325


326
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
327
328
329
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
330

331
332
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
333

Hervé  MENAGER's avatar
Hervé MENAGER committed
334

335
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
336
337
338
    """
    Symmetry of a PPI
    """
339
340
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
341

342
343
344
    class Meta:
        verbose_name_plural = "symmetries"

345
346
347
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

348
349

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
350
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
351

Hervé  MENAGER's avatar
Hervé MENAGER committed
352
    # is there any database/nomenclature for diseases?
353
354
355
356

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
357

Hervé  MENAGER's avatar
Hervé MENAGER committed
358
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
359
360
361
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
362
363
    name = models.CharField('Name', max_length=30, unique=True)

364
365
366
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
367
368
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
369

Hervé  MENAGER's avatar
Hervé MENAGER committed
370

371
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
372
373
374
    """
    PPI
    """
375
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
376
377
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
378
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
379
    diseases = models.ManyToManyField(Disease, blank=True)
380
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
381
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
382

383
    def __str__(self):
384
        return 'PPI #{} on {}'.format(self.id, self.name)
385

386
387
388
    def get_absolute_url(self):
        return reverse('ppi-view', kwargs={'ppi_pk': self.pk})

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
389
    def is_autofill_done(self):
390
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
391

392
393
394
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
395
396
397
398
399
400
401

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

402
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
403
        all_protein_names = set(
404
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
405
406
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
407
408
409
410
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
411
        if partner_str != '':
412
413
            name += ' / ' + partner_str
        return name
414

Hervé  MENAGER's avatar
Hervé MENAGER committed
415

Hervé  MENAGER's avatar
Hervé MENAGER committed
416
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
417
418
419
    """
    PPI Complex
    """
420
421
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
422
    cc_nb = models.IntegerField(
423
424
425
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
426
427
428
429

    class Meta:
        verbose_name_plural = "Ppi complexes"

430
431
432
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
433

434
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
435
436
437
438
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
439
440

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
441
        # @formatter:off
442
        qs = super().get_queryset()
443
        # with number of publications
444
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
445
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
446
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
471
472
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
473
474
475
476
477
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
478
479
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
480
        # PDB ligand available
481
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
482
483
484
485
486
487
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
488
        # cellular tests performed
489
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
490
        # inhibition tests performed
491
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
492
        # stabilisation tests performed
493
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
494
        # binding tests performed
495
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
496
        # pharmacokinetic tests performed
497
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
498
        # cytotoxicity tests performedudy
499
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
500
        # in silico st performed
501
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
502
503
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
504
        #@formatter:on
505
506
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
507

508
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
509
510
511
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
512
    objects = CompoundManager()
513
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
514
        verbose_name='Canonical Smiles',
515
516
        unique=True,
    )
517
518
519
520
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
521
    aromatic_ratio = models.DecimalField(
522
523
524
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
525
526
        blank=True,
        null=True,
527
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
528
    balaban_index = models.DecimalField(
529
530
531
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
532
533
        blank=True,
        null=True,
534
535
536
537
538
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
539
540
        blank=True,
        null=True,
541
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
542
    gc_molar_refractivity = models.DecimalField(
543
544
545
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
546
547
        blank=True,
        null=True,
548
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
549
    log_d = models.DecimalField(
550
551
552
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
553
554
        blank=True,
        null=True,
555
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
556
    a_log_p = models.DecimalField(
557
558
559
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
560
561
        blank=True,
        null=True,
562
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
563
    mean_atom_vol_vdw = models.DecimalField(
564
565
566
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
567
568
        blank=True,
        null=True,
569
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
570
    molecular_weight = models.DecimalField(
571
572
573
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
574
575
        blank=True,
        null=True,
576
577
578
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
579
580
        blank=True,
        null=True,
581
582
583
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
584
585
        blank=True,
        null=True,
586
587
588
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
589
590
        blank=True,
        null=True,
591
592
593
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
594
595
        blank=True,
        null=True,
596
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
597
    nb_aromatic_sssr = models.IntegerField(
598
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
599
600
        blank=True,
        null=True,
601
602
603
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
604
605
        blank=True,
        null=True,
606
607
608
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
609
610
        blank=True,
        null=True,
611
612
613
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
614
615
        blank=True,
        null=True,
616
617
618
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
619
620
        blank=True,
        null=True,
621
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
622
    nb_bonds_non_h = models.IntegerField(
623
        verbose_name='Number of bonds not involving a hydrogen',
624
625
        blank=True,
        null=True,
626
627
628
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
629
630
        blank=True,
        null=True,
631
632
633
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
634
635
        blank=True,
        null=True,
636
637
638
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
639
640
        blank=True,
        null=True,
641
642
643
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
644
645
        blank=True,
        null=True,
646
647
648
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
649
650
        blank=True,
        null=True,
651
652
653
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
654
655
        blank=True,
        null=True,
656
657
658
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
659
660
        blank=True,
        null=True,
661
662
663
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
664
665
        blank=True,
        null=True,
666
667
668
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
669
670
        blank=True,
        null=True,
671
672
673
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
674
675
        blank=True,
        null=True,
676
677
678
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
679
680
        blank=True,
        null=True,
681
682
683
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
684
685
        blank=True,
        null=True,
686
687
688
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
689
690
        blank=True,
        null=True,
691
692
693
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
694
695
        blank=True,
        null=True,
696
697
698
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
699
700
        blank=True,
        null=True,
701
702
703
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
704
705
        blank=True,
        null=True,
706
707
708
    )
    inchi = models.TextField(
        verbose_name='InChi',
709
710
        blank=True,
        null=True,
711
712
713
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
714
715
        blank=True,
        null=True,
716
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
717
    randic_index = models.DecimalField(
718
719
720
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
721
722
        blank=True,
        null=True,
723
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
724
    rdf070m = models.DecimalField(
725
726
727
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
728
729
        blank=True,
        null=True,
730
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
731
    rotatable_bond_fraction = models.DecimalField(
732
733
734
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
735
736
        blank=True,
        null=True,
737
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
738
    sum_atom_polar = models.DecimalField(
739
740
741
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
742
743
        blank=True,
        null=True,
744
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
745
    sum_atom_vol_vdw = models.DecimalField(
746
747
748
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
749
750
        blank=True,
        null=True,
751
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
752
    tpsa = models.DecimalField(
753
754
755
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
756
757
        blank=True,
        null=True,
758
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
759
    ui = models.DecimalField(
760
761
762
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
763
764
        blank=True,
        null=True,
765
766
767
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
768
769
        blank=True,
        null=True,
770
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
771
    common_name = models.CharField(
772
773
774
775
776
777
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
778
    pubchem_id = models.CharField(
779
780
781
782
783
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
784
    chemspider_id = models.CharField(
785
786
787
788
789
790
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
791
    chembl_id = models.CharField(
792
793
794
795
796
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
797
    iupac_name = models.CharField(
798
799
800
801
802
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
803

804
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
805
        ordering = ['id']
806

807
808
809
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
810
        # fingerprints to compute drugbank similarities are in settings module, default FP2
811
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
812
813
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
814
815
816
817
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
818
819
820
821
822
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
823
824
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
825
826
827
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
828
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
829
830
        """
        return RefCompoundBiblio.objects.filter(compound=self)
831

832
833
834
835
836
837
838
839
840
841
842
843
844
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

845
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
846
    def compound_action_ligand_ids(self):
847
848
849
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
850
        ligand_ids = set()
851
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
852
853
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
854

855
856
    @property
    def best_pXC50_activity(self):
857
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
858
859
860
861
862
863

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
864
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
865

866
    @property
867
    def best_pXC50_activity_ppi_name(self):
868
        """
869
        Name of the PPI corresponding to the best PXC50 activity
870
        """
871
872
873
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
874
875
876
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

877
878
879
880
881
882
883
884
885
886
887
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

888
889
890
891
892
893
894
895
896
897
898
899
900
901
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

902
903
904
905
906
907
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
908

909
910
911
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
912

913
914
915
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
916
        self.inchikey = smi2inchikey(self.canonical_smile)
917
        self.compute_drugbank_compound_similarity()
918

919
920
921
    def __str__(self):
        return 'Compound #{}'.format(self.id)

922
923
924
    def get_absolute_url(self):
        return reverse('compound_card', kwargs={'pk': self.pk})

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
925

926
927
928
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
929
    fingerprint = mod