models.py 43.3 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.contrib.auth import get_user_model
12
from django.core.exceptions import ValidationError
13
from django.db import models, transaction
14
15
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
16
from django.db.models.functions import Cast
17
from django.urls import reverse
18
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
19

20
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
21
22
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
23

Hervé  MENAGER's avatar
Hervé MENAGER committed
24

25
26
27
28
29
30
31
32
33
34
35
36
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
37
38
39
        auto_fill_needed = not self.is_autofill_done()
        if kwargs.get('autofill') is True or auto_fill_needed:
            auto_fill_needed = True
40
            self.autofill()
41
42
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
43
        super(AutoFillableModel, self).save(*args, **kwargs)
44
45
        if   auto_fill_needed:
            self.autofill_post_save()
46

47
48
49
    def autofill(self):
        raise NotImplementedError()

50
51
52
53
54
55
56
    def autofill_post_save(self):
        """
        method called after the save is done, usefull for settings m2m relations
        :return:
        """
        pass

57
58
59
    def is_autofill_done(self):
        return True

60
61

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
62
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
64
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
66
    """
    SOURCES = (
67
68
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
69
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
70
    )
71
72
73
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
74
        DO=re.compile("^10.\d{4,9}/.+$"),
75
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
76
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
77
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
78
79
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
80
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
81
82
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
83
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
84
85
86
87
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
88
89
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
90
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
91

92
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
93
94
95
96
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
97
98
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
99
        elif self.source == 'PT':
100
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
101
102
103
104
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
105
106
107
108
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
109

110
111
112
    def is_autofill_done(self):
        return len(self.title) > 0

113
114
    def clean(self):
        super().clean()
115
116
117
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
118
        return self.source == 'PM' or self.source == 'DO'
119
120
121
122

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
123
124
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
125
126
127
128
129

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
130
131
132
133
134
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
135
        return True
136

Hervé  MENAGER's avatar
Hervé MENAGER committed
137
    class Meta:
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
        verbose_name_plural = "Bibliographies"
        verbose_name = "Bibliography"

    def data_and_study(self):
        ret = []
        for f in [
            "cytotox",
            "xray",
            "in_silico",
            "in_vitro",
            "in_cellulo",
            "in_vivo",
            "pharmacokinetic",
        ]:
            if getattr(self, f, False):
                ret.append(self._meta.get_field(f).verbose_name.title())
        return ", ".join(ret)
Hervé  MENAGER's avatar
Hervé MENAGER committed
155

156
157
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
158

159
160
161
    def get_absolute_url(self):
        return reverse('biblio-view', kwargs={'biblio_pk': self.pk})

Hervé  MENAGER's avatar
Hervé MENAGER committed
162

163
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
164
165
166
167
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
168
169
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
170
    name = models.CharField('Organism name', max_length=200)
171

172
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
173
174
175
176
        """
        fetch information from external services
        (NCBI Entrez)
        """
177
178
179
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

180
181
182
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
183
184
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
185

Hervé  MENAGER's avatar
Hervé MENAGER committed
186

187
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
188
189
190
191
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
192
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
193
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
194
195
    description = models.CharField('description', max_length=500)

196
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
197
198
199
200
        """
        fetch information from external services
        (EBI OLS)
        """
201
202
203
        info = get_go_info(self.go_id)
        self.description = info['label']

204
205
206
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

207
208
209
210
    @property
    def name(self):
        return self.go_id + ' ' + self.description

211
212
213
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
214

215
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
216
217
218
219
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
220
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
221
222
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
223
224
225
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
226
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
227
    molecular_functions = models.ManyToManyField(MolecularFunction)
228
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
229

230
    @transaction.atomic
231
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
232
233
234
235
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
236
        info = get_uniprot_info(self.uniprot_id)
237
        self.recommended_name_long = info['recommended_name']
238
239
240
241
242
243
244
245
246
247

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

248
        self.entry_name = info['entry_name']
249
        self.short_name = info['short_name']
250
251
252
253
254
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
255
            taxonomy.save(autofill=True)
256
        self.organism = taxonomy
257
        self.__info = info
258

259
260
    def autofill_post_save(self):
        info = self.__info
261
        for go_id in info['molecular_functions']:
262
            mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id)
263
            self.molecular_functions.add(mol_function)
264

265
        for domain_id in info['domains']:
266
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
267
268
            self.domains.add(domain)

269
270
271
    def is_autofill_done(self):
        return len(self.gene_name) > 0

272
273
274
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
275

276
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
277
278
279
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
280
281
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
282
    pfam_description = models.CharField('Pfam Description', max_length=100)
283
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
284

Hervé  MENAGER's avatar
Hervé MENAGER committed
285
286
    # TODO: what is this field? check database
    # contents
287

288
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
289
290
291
292
        """
        fetch information from external services
        (PFAM)
        """
293
294
295
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
296

297
298
299
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

300
301
302
303
    @property
    def name(self):
        return self.pfam_id

304
305
306
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
307

308
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
309
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
310
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
311
    """
312
    protein = models.ForeignKey('Protein', models.CASCADE)
313
    domain = models.ForeignKey('Domain', models.CASCADE, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
314
315
316
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
317
318
    class Meta:
        verbose_name_plural = "complexes"
319

320
321
322
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

323
324
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
325

326

327
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
328
329
330
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
331
    ppp_copy_nb_per_p = models.IntegerField(
332
333
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
334

335
336
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
337
338


339
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
340
341
342
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
343

344
345
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
346

Hervé  MENAGER's avatar
Hervé MENAGER committed
347

348
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
349
350
351
    """
    Symmetry of a PPI
    """
352
353
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
354

355
356
357
    class Meta:
        verbose_name_plural = "symmetries"

358
359
360
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

361
362

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
363
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
364

Hervé  MENAGER's avatar
Hervé MENAGER committed
365
    # is there any database/nomenclature for diseases?
366
367
368
369

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
370

Hervé  MENAGER's avatar
Hervé MENAGER committed
371
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
372
373
374
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
375
376
    name = models.CharField('Name', max_length=30, unique=True)

377
378
379
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
380
381
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
382

Hervé  MENAGER's avatar
Hervé MENAGER committed
383

384
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
385
386
387
    """
    PPI
    """
388
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
389
390
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
391
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
392
    diseases = models.ManyToManyField(Disease, blank=True)
393
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
394
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
395

396
    def __str__(self):
397
        return 'PPI #{} on {}'.format(self.id, self.name)
398

399
400
401
    def get_absolute_url(self):
        return reverse('ppi-view', kwargs={'ppi_pk': self.pk})

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
402
    def is_autofill_done(self):
403
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
404

405
406
407
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
408
409
410
411
412
413
414

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

415
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
416
        all_protein_names = set(
417
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
418
419
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
420
421
422
423
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
424
        if partner_str != '':
425
426
            name += ' / ' + partner_str
        return name
427

Hervé  MENAGER's avatar
Hervé MENAGER committed
428

Hervé  MENAGER's avatar
Hervé MENAGER committed
429
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
430
431
432
    """
    PPI Complex
    """
433
434
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
435
    cc_nb = models.IntegerField(
436
437
438
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
439
440
441
442

    class Meta:
        verbose_name_plural = "Ppi complexes"

443
444
445
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
446

447
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
448
449
450
451
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
452
453

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
454
        # @formatter:off
455
        qs = super().get_queryset()
456
        # with number of publications
457
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
458
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
459
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
484
485
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
486
487
488
489
490
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
491
492
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
493
        # PDB ligand available
494
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
495
496
497
498
499
500
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
501
        # cellular tests performed
502
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
503
        # inhibition tests performed
504
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
505
        # stabilisation tests performed
506
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
507
        # binding tests performed
508
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
509
        # pharmacokinetic tests performed
510
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
511
        # cytotoxicity tests performedudy
512
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
513
        # in silico st performed
514
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
515
516
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
517
        #@formatter:on
518
519
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
520

521
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
522
523
524
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
525
    objects = CompoundManager()
526
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
527
        verbose_name='Canonical Smiles',
528
529
        unique=True,
    )
530
531
532
533
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
534
    aromatic_ratio = models.DecimalField(
535
536
537
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
538
539
        blank=True,
        null=True,
540
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
541
    balaban_index = models.DecimalField(
542
543
544
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
545
546
        blank=True,
        null=True,
547
548
549
550
551
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
552
553
        blank=True,
        null=True,
554
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
555
    gc_molar_refractivity = models.DecimalField(
556
557
558
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
559
560
        blank=True,
        null=True,
561
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
562
    log_d = models.DecimalField(
563
564
565
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
566
567
        blank=True,
        null=True,
568
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
569
    a_log_p = models.DecimalField(
570
571
572
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
573
574
        blank=True,
        null=True,
575
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
576
    mean_atom_vol_vdw = models.DecimalField(
577
578
579
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
580
581
        blank=True,
        null=True,
582
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
583
    molecular_weight = models.DecimalField(
584
585
586
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
587
588
        blank=True,
        null=True,
589
590
591
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
592
593
        blank=True,
        null=True,
594
595
596
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
597
598
        blank=True,
        null=True,
599
600
601
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
602
603
        blank=True,
        null=True,
604
605
606
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
607
608
        blank=True,
        null=True,
609
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
610
    nb_aromatic_sssr = models.IntegerField(
611
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
612
613
        blank=True,
        null=True,
614
615
616
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
617
618
        blank=True,
        null=True,
619
620
621
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
622
623
        blank=True,
        null=True,
624
625
626
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
627
628
        blank=True,
        null=True,
629
630
631
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
632
633
        blank=True,
        null=True,
634
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
635
    nb_bonds_non_h = models.IntegerField(
636
        verbose_name='Number of bonds not involving a hydrogen',
637
638
        blank=True,
        null=True,
639
640
641
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
642
643
        blank=True,
        null=True,
644
645
646
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
647
648
        blank=True,
        null=True,
649
650
651
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
652
653
        blank=True,
        null=True,
654
655
656
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
657
658
        blank=True,
        null=True,
659
660
661
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
662
663
        blank=True,
        null=True,
664
665
666
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
667
668
        blank=True,
        null=True,
669
670
671
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
672
673
        blank=True,
        null=True,
674
675
676
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
677
678
        blank=True,
        null=True,
679
680
681
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
682
683
        blank=True,
        null=True,
684
685
686
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
687
688
        blank=True,
        null=True,
689
690
691
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
692
693
        blank=True,
        null=True,
694
695
696
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
697
698
        blank=True,
        null=True,
699
700
701
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
702
703
        blank=True,
        null=True,
704
705
706
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
707
708
        blank=True,
        null=True,
709
710
711
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
712
713
        blank=True,
        null=True,
714
715
716
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
717
718
        blank=True,
        null=True,
719
720
721
    )
    inchi = models.TextField(
        verbose_name='InChi',
722
723
        blank=True,
        null=True,
724
725
726
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
727
728
        blank=True,
        null=True,
729
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
730
    randic_index = models.DecimalField(
731
732
733
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
734
735
        blank=True,
        null=True,
736
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
737
    rdf070m = models.DecimalField(
738
739
740
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
741
742
        blank=True,
        null=True,
743
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
744
    rotatable_bond_fraction = models.DecimalField(
745
746
747
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
748
749
        blank=True,
        null=True,
750
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
751
    sum_atom_polar = models.DecimalField(
752
753
754
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
755
756
        blank=True,
        null=True,
757
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
758
    sum_atom_vol_vdw = models.DecimalField(
759
760
761
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
762
763
        blank=True,
        null=True,
764
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
765
    tpsa = models.DecimalField(
766
767
768
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
769
770
        blank=True,
        null=True,
771
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
772
    ui = models.DecimalField(
773
774
775
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
776
777
        blank=True,
        null=True,
778
779
780
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
781
782
        blank=True,
        null=True,
783
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
784
    common_name = models.CharField(
785
786
787
788
789
        verbose_name='Common name',
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
790
    pubchem_id = models.CharField(
791
792
793
794
795
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
796
    chemspider_id = models.CharField(
797
798
799
800
801
802
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
803
    chembl_id = models.CharField(
804
805
806
807
808
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
809
    iupac_name = models.CharField(
810
811
812
813
814
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
815

816
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
817
        ordering = ['id']
818

819
820
821
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
822
        # fingerprints to compute drugbank similarities are in settings module, default FP2
823
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
824
825
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
826
827
828
829
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
830
831
832
833
834
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
835
836
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
837
838
839
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
840
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
841
842
        """
        return RefCompoundBiblio.objects.filter(compound=self)
843

844
845
846
847
848
849
850
851
852
853
854
855
856
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

857
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
858
    def compound_action_ligand_ids(self):
859
860
861
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
862
        ligand_ids = set()
863
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
864
865
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
866

867
868
    @property
    def best_pXC50_activity(self):
869
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
870
871
872
873
874
875

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
876
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
877

878
    @property
879
    def best_pXC50_activity_ppi_name(self):
880
        """
881
        Name of the PPI corresponding to the best PXC50 activity
882
        """
883
884
885
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
886
887
888
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

889
890
891
892
893
894
895
896
897
898
899
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

900
901
902
903
904
905
906
907
908
909
910
911
912
913
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

914
915
916
917
918
919
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
920

921
922
923
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
924

925
926
927
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
928
        self.inchikey = smi2inchikey(self.canonical_smile)
929
        self.compute_drugbank_compound_similarity()
930