models.py 42.3 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.contrib.auth import get_user_model
12
from django.core.exceptions import ValidationError
13
from django.db import models, transaction
14
15
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
16
from django.db.models.functions import Cast
17
from django.urls import reverse
18
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
19

20
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
21
22
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
23

Hervé  MENAGER's avatar
Hervé MENAGER committed
24

25
26
27
28
29
30
31
32
33
34
35
36
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
37
        if kwargs.get('autofill') is True or not self.is_autofill_done():
38
            self.autofill()
39
40
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
41
        super(AutoFillableModel, self).save(*args, **kwargs)
42

43
44
45
46
47
48
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

49
50

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
51
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
52
53
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
54
55
    """
    SOURCES = (
56
57
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
58
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
59
    )
60
61
62
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
63
        DO=re.compile("^10.\d{4,9}/.+$"),
64
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
66
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
67
68
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
69
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
70
71
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
72
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
73
74
75
76
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
77
78
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
79
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
80

81
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
82
83
84
85
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
86
87
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
88
        elif self.source == 'PT':
89
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
90
91
92
93
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
94
95
96
97
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
98

99
100
101
    def is_autofill_done(self):
        return len(self.title) > 0

102
103
    def clean(self):
        super().clean()
104
105
106
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
107
        return self.source == 'PM' or self.source == 'DO'
108
109
110
111

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
112
113
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
114
115
116
117
118

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
119
120
121
122
123
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
124
        return True
125

Hervé  MENAGER's avatar
Hervé MENAGER committed
126
    class Meta:
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
        verbose_name_plural = "Bibliographies"
        verbose_name = "Bibliography"

    def data_and_study(self):
        ret = []
        for f in [
            "cytotox",
            "xray",
            "in_silico",
            "in_vitro",
            "in_cellulo",
            "in_vivo",
            "pharmacokinetic",
        ]:
            if getattr(self, f, False):
                ret.append(self._meta.get_field(f).verbose_name.title())
        return ", ".join(ret)
Hervé  MENAGER's avatar
Hervé MENAGER committed
144

145
146
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
147

Hervé  MENAGER's avatar
Hervé MENAGER committed
148

149
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
150
151
152
153
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
154
155
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
156
    name = models.CharField('Organism name', max_length=200)
157

158
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
159
160
161
162
        """
        fetch information from external services
        (NCBI Entrez)
        """
163
164
165
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

166
167
168
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
169
170
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
171

Hervé  MENAGER's avatar
Hervé MENAGER committed
172

173
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
174
175
176
177
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
178
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
179
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
180
181
    description = models.CharField('description', max_length=500)

182
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
183
184
185
186
        """
        fetch information from external services
        (EBI OLS)
        """
187
188
189
        info = get_go_info(self.go_id)
        self.description = info['label']

190
191
192
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

193
194
195
196
    @property
    def name(self):
        return self.go_id + ' ' + self.description

197
198
199
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
200

201
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
202
203
204
205
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
206
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
207
208
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
209
210
211
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
212
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
213
    molecular_functions = models.ManyToManyField(MolecularFunction)
214
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
215

216
    @transaction.atomic
217
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
218
219
220
221
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
222
        info = get_uniprot_info(self.uniprot_id)
223
        self.recommended_name_long = info['recommended_name']
224
225
226
227
228
229
230
231
232
233

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

234
        self.entry_name = info['entry_name']
235
        self.short_name = info['short_name']
236
237
238
239
240
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
241
            taxonomy.save(autofill=True)
242
        self.organism = taxonomy
243
        super(Protein, self).save()
244

245
        for go_id in info['molecular_functions']:
246
            mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id)
247
            self.molecular_functions.add(mol_function)
248

249
        for domain_id in info['domains']:
250
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
251
252
            self.domains.add(domain)

253
254
255
    def is_autofill_done(self):
        return len(self.gene_name) > 0

256
257
258
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
259

260
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
261
262
263
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
264
265
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
266
    pfam_description = models.CharField('Pfam Description', max_length=100)
267
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
268

Hervé  MENAGER's avatar
Hervé MENAGER committed
269
270
    # TODO: what is this field? check database
    # contents
271

272
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
273
274
275
276
        """
        fetch information from external services
        (PFAM)
        """
277
278
279
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
280

281
282
283
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

284
285
286
287
    @property
    def name(self):
        return self.pfam_id

288
289
290
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
291

292
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
293
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
294
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
295
    """
296
297
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
298
299
300
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
301
302
    class Meta:
        verbose_name_plural = "complexes"
303

304
305
306
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

307
308
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
309

310

311
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
312
313
314
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
315
    ppp_copy_nb_per_p = models.IntegerField(
316
317
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
318

319
320
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
321
322


323
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
324
325
326
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
327

328
329
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
330

Hervé  MENAGER's avatar
Hervé MENAGER committed
331

332
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
333
334
335
    """
    Symmetry of a PPI
    """
336
337
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
338

339
340
341
    class Meta:
        verbose_name_plural = "symmetries"

342
343
344
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

345
346

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
347
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
348

Hervé  MENAGER's avatar
Hervé MENAGER committed
349
    # is there any database/nomenclature for diseases?
350
351
352
353

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
354

Hervé  MENAGER's avatar
Hervé MENAGER committed
355
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
356
357
358
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
359
360
    name = models.CharField('Name', max_length=30, unique=True)

361
362
363
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
364
365
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
366

Hervé  MENAGER's avatar
Hervé MENAGER committed
367

368
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
369
370
371
    """
    PPI
    """
372
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
373
374
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
375
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
376
    diseases = models.ManyToManyField(Disease, blank=True)
377
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
378
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
379

380
    def __str__(self):
381
        return 'PPI #{} on {}'.format(self.id, self.name)
382

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
383
    def is_autofill_done(self):
384
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
385

386
387
388
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
389
390
391
392
393
394
395

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

396
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
397
        all_protein_names = set(
398
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
399
400
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
401
402
403
404
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
405
        if partner_str != '':
406
407
            name += ' / ' + partner_str
        return name
408

Hervé  MENAGER's avatar
Hervé MENAGER committed
409

Hervé  MENAGER's avatar
Hervé MENAGER committed
410
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
411
412
413
    """
    PPI Complex
    """
414
415
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
416
    cc_nb = models.IntegerField(
417
418
419
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
420
421
422
423

    class Meta:
        verbose_name_plural = "Ppi complexes"

424
425
426
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
427

428
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
429
430
431
432
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
433
434

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
435
        # @formatter:off
436
        qs = super().get_queryset()
437
        # with number of publications
438
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
439
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
440
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
465
466
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
467
468
469
470
471
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
472
473
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
474
        # PDB ligand available
475
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
476
477
478
479
480
481
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
482
        # cellular tests performed
483
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
484
        # inhibition tests performed
485
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
486
        # stabilisation tests performed
487
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
488
        # binding tests performed
489
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
490
        # pharmacokinetic tests performed
491
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
492
        # cytotoxicity tests performedudy
493
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
494
        # in silico st performed
495
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
496
497
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
498
        #@formatter:on
499
500
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
501

502
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
503
504
505
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
506
    objects = CompoundManager()
507
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
508
        verbose_name='Canonical Smiles',
509
510
        unique=True,
    )
511
512
513
514
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
515
    aromatic_ratio = models.DecimalField(
516
517
518
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
519
520
        blank=True,
        null=True,
521
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
522
    balaban_index = models.DecimalField(
523
524
525
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
526
527
        blank=True,
        null=True,
528
529
530
531
532
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
533
534
        blank=True,
        null=True,
535
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
536
    gc_molar_refractivity = models.DecimalField(
537
538
539
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
540
541
        blank=True,
        null=True,
542
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
543
    log_d = models.DecimalField(
544
545
546
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
547
548
        blank=True,
        null=True,
549
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
550
    a_log_p = models.DecimalField(
551
552
553
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
554
555
        blank=True,
        null=True,
556
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
557
    mean_atom_vol_vdw = models.DecimalField(
558
559
560
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
561
562
        blank=True,
        null=True,
563
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
564
    molecular_weight = models.DecimalField(
565
566
567
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
568
569
        blank=True,
        null=True,
570
571
572
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
573
574
        blank=True,
        null=True,
575
576
577
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
578
579
        blank=True,
        null=True,
580
581
582
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
583
584
        blank=True,
        null=True,
585
586
587
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
588
589
        blank=True,
        null=True,
590
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
591
    nb_aromatic_sssr = models.IntegerField(
592
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
593
594
        blank=True,
        null=True,
595
596
597
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
598
599
        blank=True,
        null=True,
600
601
602
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
603
604
        blank=True,
        null=True,
605
606
607
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
608
609
        blank=True,
        null=True,
610
611
612
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
613
614
        blank=True,
        null=True,
615
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
616
    nb_bonds_non_h = models.IntegerField(
617
        verbose_name='Number of bonds not involving a hydrogen',
618
619
        blank=True,
        null=True,
620
621
622
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
623
624
        blank=True,
        null=True,
625
626
627
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
628
629
        blank=True,
        null=True,
630
631
632
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
633
634
        blank=True,
        null=True,
635
636
637
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
638
639
        blank=True,
        null=True,
640
641
642
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
643
644
        blank=True,
        null=True,
645
646
647
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
648
649
        blank=True,
        null=True,
650
651
652
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
653
654
        blank=True,
        null=True,
655
656
657
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
658
659
        blank=True,
        null=True,
660
661
662
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
663
664
        blank=True,
        null=True,
665
666
667
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
668
669
        blank=True,
        null=True,
670
671
672
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
673
674
        blank=True,
        null=True,
675
676
677
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
678
679
        blank=True,
        null=True,
680
681
682
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
683
684
        blank=True,
        null=True,
685
686
687
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
688
689
        blank=True,
        null=True,
690
691
692
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
693
694
        blank=True,
        null=True,
695
696
697
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
698
699
        blank=True,
        null=True,
700
701
702
    )
    inchi = models.TextField(
        verbose_name='InChi',
703
704
        blank=True,
        null=True,
705
706
707
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
708
709
        blank=True,
        null=True,
710
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
711
    randic_index = models.DecimalField(
712
713
714
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
715
716
        blank=True,
        null=True,
717
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
718
    rdf070m = models.DecimalField(
719
720
721
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
722
723
        blank=True,
        null=True,
724
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
725
    rotatable_bond_fraction = models.DecimalField(
726
727
728
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
729
730
        blank=True,
        null=True,
731
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
732
    sum_atom_polar = models.DecimalField(
733
734
735
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
736
737
        blank=True,
        null=True,
738
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
739
    sum_atom_vol_vdw = models.DecimalField(
740
741
742
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
743
744
        blank=True,
        null=True,
745
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
746
    tpsa = models.DecimalField(
747
748
749
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
750
751
        blank=True,
        null=True,
752
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
753
    ui = models.DecimalField(
754
755
756
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
757
758
        blank=True,
        null=True,
759
760
761
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
762
763
        blank=True,
        null=True,
764
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
765
    common_name = models.CharField(
766
767
768
769
770
771
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
772
    pubchem_id = models.CharField(
773
774
775
776
777
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
778
    chemspider_id = models.CharField(
779
780
781
782
783
784
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
785
    chembl_id = models.CharField(
786
787
788
789
790
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
791
    iupac_name = models.CharField(
792
793
794
795
796
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
797

798
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
799
        ordering = ['id']
800

801
802
803
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
804
        # fingerprints to compute drugbank similarities are in settings module, default FP2
805
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
806
807
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
808
809
810
811
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
812
813
814
815
816
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
817
818
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
819
820
821
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
822
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
823
824
        """
        return RefCompoundBiblio.objects.filter(compound=self)
825

826
827
828
829
830
831
832
833
834
835
836
837
838
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

839
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
840
    def compound_action_ligand_ids(self):
841
842
843
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
844
        ligand_ids = set()
845
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
846
847
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
848

849
850
    @property
    def best_pXC50_activity(self):
851
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
852
853
854
855
856
857

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
858
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
859

860
    @property
861
    def best_pXC50_activity_ppi_name(self):
862
        """
863
        Name of the PPI corresponding to the best PXC50 activity
864
        """
865
866
867
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
868
869
870
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

871
872
873
874
875
876
877
878
879
880
881
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

882
883
884
885
886
887
888
889
890
891
892
893
894
895
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

896
897
898
899
900
901
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
902

903
904
905
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
906

907
908
909
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
910
        self.inchikey = smi2inchikey(self.canonical_smile)
911
        self.compute_drugbank_compound_similarity()
912

913
914
915
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
916

917
918
919
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
920
    fingerprint = models.TextField('Fingerprint')
921
922
923
924
925
926
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
927
            ('canonical_smiles', 'fingerprint', 'compound'))
928

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
929