models.py 41.6 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.core.exceptions import ValidationError
12
from django.db import models, transaction
13
14
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
19
20
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
21

Hervé  MENAGER's avatar
Hervé MENAGER committed
22

23
24
25
26
27
28
29
30
31
32
33
34
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
35
        if kwargs.get('autofill') is True or not self.is_autofill_done():
36
            self.autofill()
37
38
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
39
        super(AutoFillableModel, self).save(*args, **kwargs)
40

41
42
43
44
45
46
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

47
48

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
49
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
50
51
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
52
53
    """
    SOURCES = (
54
55
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
56
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
57
    )
58
59
60
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
61
        DO=re.compile("^10.\d{4,9}/.+$"),
62
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
64
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
66
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
67
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
68
69
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
70
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
71
72
73
74
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
75
76
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
77
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
78

79
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
80
81
82
83
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
84
85
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
86
        elif self.source == 'PT':
87
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
88
89
90
91
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
92
93
94
95
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
96

97
98
99
    def is_autofill_done(self):
        return len(self.title) > 0

100
101
    def clean(self):
        super().clean()
102
103
104
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
105
        return self.source == 'PM' or self.source == 'DO'
106
107
108
109

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
110
111
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
112
113
114
115
116

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
117
118
119
120
121
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
122
        return True
123

Hervé  MENAGER's avatar
Hervé MENAGER committed
124
    class Meta:
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
        verbose_name_plural = "Bibliographies"
        verbose_name = "Bibliography"

    def data_and_study(self):
        ret = []
        for f in [
            "cytotox",
            "xray",
            "in_silico",
            "in_vitro",
            "in_cellulo",
            "in_vivo",
            "pharmacokinetic",
        ]:
            if getattr(self, f, False):
                ret.append(self._meta.get_field(f).verbose_name.title())
        return ", ".join(ret)
Hervé  MENAGER's avatar
Hervé MENAGER committed
142

143
144
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
145

Hervé  MENAGER's avatar
Hervé MENAGER committed
146

147
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
148
149
150
151
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
152
153
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
154
    name = models.CharField('Organism name', max_length=200)
155

156
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
157
158
159
160
        """
        fetch information from external services
        (NCBI Entrez)
        """
161
162
163
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

164
165
166
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
167
168
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
169

Hervé  MENAGER's avatar
Hervé MENAGER committed
170

171
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
172
173
174
175
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
176
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
177
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
178
179
    description = models.CharField('description', max_length=500)

180
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
181
182
183
184
        """
        fetch information from external services
        (EBI OLS)
        """
185
186
187
        info = get_go_info(self.go_id)
        self.description = info['label']

188
189
190
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

191
192
193
194
    @property
    def name(self):
        return self.go_id + ' ' + self.description

195
196
197
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
198

199
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
200
201
202
203
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
204
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
205
206
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
207
208
209
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
210
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
211
    molecular_functions = models.ManyToManyField(MolecularFunction)
212
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
213

214
    @transaction.atomic
215
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
216
217
218
219
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
220
        info = get_uniprot_info(self.uniprot_id)
221
        self.recommended_name_long = info['recommended_name']
222
223
224
225
226
227
228
229
230
231

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

232
        self.entry_name = info['entry_name']
233
        self.short_name = info['short_name']
234
235
236
237
238
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
239
            taxonomy.save(autofill=True)
240
        self.organism = taxonomy
241
        super(Protein, self).save()
242

243
        for go_id in info['molecular_functions']:
244
            mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id)
245
            self.molecular_functions.add(mol_function)
246

247
        for domain_id in info['domains']:
248
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
249
250
            self.domains.add(domain)

251
252
253
    def is_autofill_done(self):
        return len(self.gene_name) > 0

254
255
256
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
257

258
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
259
260
261
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
262
263
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
264
    pfam_description = models.CharField('Pfam Description', max_length=100)
265
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
266

Hervé  MENAGER's avatar
Hervé MENAGER committed
267
268
    # TODO: what is this field? check database
    # contents
269

270
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
271
272
273
274
        """
        fetch information from external services
        (PFAM)
        """
275
276
277
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
278

279
280
281
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

282
283
284
285
    @property
    def name(self):
        return self.pfam_id

286
287
288
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
289

290
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
291
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
292
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
293
    """
294
295
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
296
297
298
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
299
300
    class Meta:
        verbose_name_plural = "complexes"
301

302
303
304
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

305
306
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
307

308

309
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
310
311
312
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
313
    ppp_copy_nb_per_p = models.IntegerField(
314
315
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
316

317
318
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
319
320


321
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
322
323
324
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
325

326
327
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
328

Hervé  MENAGER's avatar
Hervé MENAGER committed
329

330
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
331
332
333
    """
    Symmetry of a PPI
    """
334
335
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
336

337
338
339
    class Meta:
        verbose_name_plural = "symmetries"

340
341
342
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

343
344

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
345
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
346

Hervé  MENAGER's avatar
Hervé MENAGER committed
347
    # is there any database/nomenclature for diseases?
348
349
350
351

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
352

Hervé  MENAGER's avatar
Hervé MENAGER committed
353
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
354
355
356
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
357
358
    name = models.CharField('Name', max_length=30, unique=True)

359
360
361
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
362
363
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
364

Hervé  MENAGER's avatar
Hervé MENAGER committed
365

366
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
367
368
369
    """
    PPI
    """
370
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
371
372
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
373
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
374
    diseases = models.ManyToManyField(Disease, null=True, blank=True)
375
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
376
    name = models.TextField('PPI name', blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
377

378
    def __str__(self):
379
        return 'PPI #{} on {}'.format(self.id, self.name)
380

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
381
    def is_autofill_done(self):
382
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
383

384
385
386
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
387
388
389
390
391
392
393

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

394
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
395
        all_protein_names = set(
396
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
397
398
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
399
400
401
402
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
403
        if partner_str != '':
404
405
            name += ' / ' + partner_str
        return name
406

Hervé  MENAGER's avatar
Hervé MENAGER committed
407

Hervé  MENAGER's avatar
Hervé MENAGER committed
408
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
409
410
411
    """
    PPI Complex
    """
412
413
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
414
    cc_nb = models.IntegerField(
415
416
417
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
418
419
420
421

    class Meta:
        verbose_name_plural = "Ppi complexes"

422
423
424
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
425

426
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
427
428
429
430
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
431
432

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
433
        # @formatter:off
434
        qs = super().get_queryset()
435
        # with number of publications
436
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
437
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
438
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
463
464
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
465
466
467
468
469
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
470
471
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
472
        # PDB ligand available
473
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
474
475
476
477
478
479
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
480
        # cellular tests performed
481
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
482
        # inhibition tests performed
483
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
484
        # stabilisation tests performed
485
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
486
        # binding tests performed
487
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
488
        # pharmacokinetic tests performed
489
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
490
        # cytotoxicity tests performedudy
491
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
492
        # in silico st performed
493
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
494
495
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
496
        #@formatter:on
497
498
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
499

500
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
501
502
503
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
504
    objects = CompoundManager()
505
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
506
        verbose_name='Canonical Smiles',
507
508
        unique=True,
    )
509
510
511
512
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
513
    aromatic_ratio = models.DecimalField(
514
515
516
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
517
518
        blank=True,
        null=True,
519
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
520
    balaban_index = models.DecimalField(
521
522
523
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
524
525
        blank=True,
        null=True,
526
527
528
529
530
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
531
532
        blank=True,
        null=True,
533
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
534
    gc_molar_refractivity = models.DecimalField(
535
536
537
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
538
539
        blank=True,
        null=True,
540
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
541
    log_d = models.DecimalField(
542
543
544
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
545
546
        blank=True,
        null=True,
547
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
548
    a_log_p = models.DecimalField(
549
550
551
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
552
553
        blank=True,
        null=True,
554
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
555
    mean_atom_vol_vdw = models.DecimalField(
556
557
558
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
559
560
        blank=True,
        null=True,
561
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
562
    molecular_weight = models.DecimalField(
563
564
565
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
566
567
        blank=True,
        null=True,
568
569
570
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
571
572
        blank=True,
        null=True,
573
574
575
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
576
577
        blank=True,
        null=True,
578
579
580
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
581
582
        blank=True,
        null=True,
583
584
585
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
586
587
        blank=True,
        null=True,
588
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
589
    nb_aromatic_sssr = models.IntegerField(
590
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
591
592
        blank=True,
        null=True,
593
594
595
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
596
597
        blank=True,
        null=True,
598
599
600
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
601
602
        blank=True,
        null=True,
603
604
605
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
606
607
        blank=True,
        null=True,
608
609
610
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
611
612
        blank=True,
        null=True,
613
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
614
    nb_bonds_non_h = models.IntegerField(
615
        verbose_name='Number of bonds not involving a hydrogen',
616
617
        blank=True,
        null=True,
618
619
620
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
621
622
        blank=True,
        null=True,
623
624
625
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
626
627
        blank=True,
        null=True,
628
629
630
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
631
632
        blank=True,
        null=True,
633
634
635
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
636
637
        blank=True,
        null=True,
638
639
640
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
641
642
        blank=True,
        null=True,
643
644
645
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
646
647
        blank=True,
        null=True,
648
649
650
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
651
652
        blank=True,
        null=True,
653
654
655
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
656
657
        blank=True,
        null=True,
658
659
660
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
661
662
        blank=True,
        null=True,
663
664
665
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
666
667
        blank=True,
        null=True,
668
669
670
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
671
672
        blank=True,
        null=True,
673
674
675
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
676
677
        blank=True,
        null=True,
678
679
680
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
681
682
        blank=True,
        null=True,
683
684
685
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
686
687
        blank=True,
        null=True,
688
689
690
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
691
692
        blank=True,
        null=True,
693
694
695
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
696
697
        blank=True,
        null=True,
698
699
700
    )
    inchi = models.TextField(
        verbose_name='InChi',
701
702
        blank=True,
        null=True,
703
704
705
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
706
707
        blank=True,
        null=True,
708
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
709
    randic_index = models.DecimalField(
710
711
712
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
713
714
        blank=True,
        null=True,
715
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
716
    rdf070m = models.DecimalField(
717
718
719
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
720
721
        blank=True,
        null=True,
722
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
723
    rotatable_bond_fraction = models.DecimalField(
724
725
726
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
727
728
        blank=True,
        null=True,
729
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
730
    sum_atom_polar = models.DecimalField(
731
732
733
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
734
735
        blank=True,
        null=True,
736
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
737
    sum_atom_vol_vdw = models.DecimalField(
738
739
740
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
741
742
        blank=True,
        null=True,
743
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
744
    tpsa = models.DecimalField(
745
746
747
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
748
749
        blank=True,
        null=True,
750
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
751
    ui = models.DecimalField(
752
753
754
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
755
756
        blank=True,
        null=True,
757
758
759
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
760
761
        blank=True,
        null=True,
762
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
763
    common_name = models.CharField(
764
765
766
767
768
769
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
770
    pubchem_id = models.CharField(
771
772
773
774
775
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
776
    chemspider_id = models.CharField(
777
778
779
780
781
782
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
783
    chembl_id = models.CharField(
784
785
786
787
788
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
789
    iupac_name = models.CharField(
790
791
792
793
794
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
795

796
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
797
        ordering = ['id']
798

799
800
801
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
802
        # fingerprints to compute drugbank similarities are in settings module, default FP2
803
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
804
805
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
806
807
808
809
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
810
811
812
813
814
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
815
816
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
817
818
819
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
820
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
821
822
        """
        return RefCompoundBiblio.objects.filter(compound=self)
823

824
825
826
827
828
829
830
831
832
833
834
835
836
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

837
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
838
    def compound_action_ligand_ids(self):
839
840
841
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
842
        ligand_ids = set()
843
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
844
845
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
846

847
848
    @property
    def best_pXC50_activity(self):
849
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
850
851
852
853
854
855

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
856
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
857

858
    @property
859
    def best_pXC50_activity_ppi_name(self):
860
        """
861
        Name of the PPI corresponding to the best PXC50 activity
862
        """
863
864
865
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
866
867
868
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

869
870
871
872
873
874
875
876
877
878
879
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

880
881
882
883
884
885
886
887
888
889
890
891
892
893
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

894
895
896
897
898
899
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
900

901
902
903
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
904

905
906
907
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
908
        self.inchikey = smi2inchikey(self.canonical_smile)
909
        self.compute_drugbank_compound_similarity()
910

911
912
913
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
914

915
916
917
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
918
    fingerprint = models.TextField('Fingerprint')
919
920
921
922
923
924
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
925
            ('canonical_smiles', 'fingerprint', 'compound'))
926

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
927

928
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
929
930
931
932