models.py 40.8 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.core.exceptions import ValidationError
12
from django.db import models, transaction
13
14
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
19
20
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
21

Hervé  MENAGER's avatar
Hervé MENAGER committed
22

23
24
25
26
27
28
29
30
31
32
33
34
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
35
        if kwargs.get('autofill') is True or not self.is_autofill_done():
36
            self.autofill()
37
38
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
39
        super(AutoFillableModel, self).save(*args, **kwargs)
40

41
42
43
44
45
46
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

47
48

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
49
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
50
51
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
52
53
    """
    SOURCES = (
54
55
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
56
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
57
    )
58
59
60
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
61
        DO=re.compile("^10.\d{4,9}/.+$"),
62
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
64
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
66
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
67
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
68
69
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
70
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
71
72
73
74
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
75
76
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
77
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
78

79
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
80
81
82
83
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
84
85
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
86
        elif self.source == 'PT':
87
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
88
89
90
91
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
92
93
94
95
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
96

97
98
99
    def is_autofill_done(self):
        return len(self.title) > 0

100
101
    def clean(self):
        super().clean()
102
103
104
105
106
107
108
109
110
111
112
113
114
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
        return self.source == 'PM'

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
115
116
117
118
119
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
120
        return True
121

Hervé  MENAGER's avatar
Hervé MENAGER committed
122
123
124
    class Meta:
        verbose_name_plural = "bibliographies"

125
126
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
127

Hervé  MENAGER's avatar
Hervé MENAGER committed
128

129
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
130
131
132
133
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
134
135
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
136
    name = models.CharField('Organism name', max_length=200)
137

138
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
139
140
141
142
        """
        fetch information from external services
        (NCBI Entrez)
        """
143
144
145
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

146
147
148
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
149
150
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
151

Hervé  MENAGER's avatar
Hervé MENAGER committed
152

153
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
154
155
156
157
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
158
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
159
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
160
161
    description = models.CharField('description', max_length=500)

162
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
163
164
165
166
        """
        fetch information from external services
        (EBI OLS)
        """
167
168
169
        info = get_go_info(self.go_id)
        self.description = info['label']

170
171
172
173
    @property
    def name(self):
        return self.go_id + ' ' + self.description

174
175
176
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
177

178
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
179
180
181
182
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
183
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
184
185
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
186
187
188
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
189
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
190
    molecular_functions = models.ManyToManyField(MolecularFunction)
191
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
192

193
    @transaction.atomic
194
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
195
196
197
198
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
199
        info = get_uniprot_info(self.uniprot_id)
200
        self.recommended_name_long = info['recommended_name']
201
202
        self.gene_name = info['gene']
        self.entry_name = info['entry_name']
203
        self.short_name = info['short_name']
204
205
206
207
208
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
209
            taxonomy.save(autofill=True)
210
        self.organism = taxonomy
211
        super(Protein, self).save()
212

213
214
215
216
217
218
        for go_id in info['molecular_functions']:
            try:
                mol_function = MolecularFunction.objects.get(go_id=go_id)
            except MolecularFunction.DoesNotExist:
                mol_function = MolecularFunction()
                mol_function.go_id = go_id
219
                mol_function.save(autofill=True)
220
            self.molecular_functions.add(mol_function)
221

222
223
224
225
226
227
228
229
230
        for domain_id in info['domains']:
            try:
                domain = Domain.objects.get(pfam_acc=domain_id)
            except MolecularFunction.DoesNotExist:
                domain = Domain()
                domain.pfam_acc = domain_id
                domain.save(autofill=True)
            self.domains.add(domain)

231
232
233
    def is_autofill_done(self):
        return len(self.gene_name) > 0

234
235
236
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
237

238
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
239
240
241
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
242
243
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
244
    pfam_description = models.CharField('Pfam Description', max_length=100)
245
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
246

Hervé  MENAGER's avatar
Hervé MENAGER committed
247
248
    # TODO: what is this field? check database
    # contents
249

250
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
251
252
253
254
        """
        fetch information from external services
        (PFAM)
        """
255
256
257
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
258

259
260
261
262
    @property
    def name(self):
        return self.pfam_id

263
264
265
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
266

267
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
268
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
269
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
270
    """
271
272
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
273
274
275
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
276
277
    class Meta:
        verbose_name_plural = "complexes"
278

279
280
281
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

282
283
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
284

285

286
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
287
288
289
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
290
    ppp_copy_nb_per_p = models.IntegerField(
291
292
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
293

294
295
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
296
297


298
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
299
300
301
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
302

303
304
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
305

Hervé  MENAGER's avatar
Hervé MENAGER committed
306

307
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
308
309
310
    """
    Symmetry of a PPI
    """
311
312
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
313

314
315
316
    class Meta:
        verbose_name_plural = "symmetries"

317
318
319
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

320
321

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
322
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
323

Hervé  MENAGER's avatar
Hervé MENAGER committed
324
    # is there any database/nomenclature for diseases?
325
326
327
328

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
329

Hervé  MENAGER's avatar
Hervé MENAGER committed
330
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
331
332
333
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
334
335
    name = models.CharField('Name', max_length=30, unique=True)

336
337
338
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
339
340
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
341

Hervé  MENAGER's avatar
Hervé MENAGER committed
342

343
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
344
345
346
    """
    PPI
    """
347
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
348
349
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
350
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
351
    diseases = models.ManyToManyField(Disease)
352
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
353
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
354

355
    def __str__(self):
356
        return 'PPI #{} on {}'.format(self.id, self.name)
357

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
358
359
360
    def is_autofill_done(self):
        return self.name == ""

361
362
363
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
364
365
366
367
368
369
370

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

371
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
372
        all_protein_names = set(
373
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
374
375
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
376
377
378
379
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
380
        if partner_str != '':
381
382
            name += ' / ' + partner_str
        return name
383

Hervé  MENAGER's avatar
Hervé MENAGER committed
384

Hervé  MENAGER's avatar
Hervé MENAGER committed
385
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
386
387
388
    """
    PPI Complex
    """
389
390
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
391
    cc_nb = models.IntegerField(
392
393
394
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
395
396
397
398

    class Meta:
        verbose_name_plural = "Ppi complexes"

399
400
401
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
402

403
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
404
405
406
407
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
408
409

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
410
        # @formatter:off
411
        qs = super().get_queryset()
412
        # with number of publications
413
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
414
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
415
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
440
441
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
442
443
444
445
446
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
447
448
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
449
        # PDB ligand available
450
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
451
452
453
454
455
456
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
457
        # cellular tests performed
458
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
459
        # inhibition tests performed
460
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
461
        # stabilisation tests performed
462
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
463
        # binding tests performed
464
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
465
        # pharmacokinetic tests performed
466
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
467
        # cytotoxicity tests performedudy
468
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
469
        # in silico st performed
470
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
471
472
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
473
        #@formatter:on
474
475
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
476

477
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
478
479
480
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
481
    objects = CompoundManager()
482
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
483
        verbose_name='Canonical Smiles',
484
485
        unique=True,
    )
486
487
488
489
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
490
    aromatic_ratio = models.DecimalField(
491
492
493
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
494
495
        blank=True,
        null=True,
496
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
497
    balaban_index = models.DecimalField(
498
499
500
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
501
502
        blank=True,
        null=True,
503
504
505
506
507
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
508
509
        blank=True,
        null=True,
510
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
511
    gc_molar_refractivity = models.DecimalField(
512
513
514
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
515
516
        blank=True,
        null=True,
517
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
518
    log_d = models.DecimalField(
519
520
521
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
522
523
        blank=True,
        null=True,
524
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
525
    a_log_p = models.DecimalField(
526
527
528
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
529
530
        blank=True,
        null=True,
531
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
532
    mean_atom_vol_vdw = models.DecimalField(
533
534
535
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
536
537
        blank=True,
        null=True,
538
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
539
    molecular_weight = models.DecimalField(
540
541
542
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
543
544
        blank=True,
        null=True,
545
546
547
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
548
549
        blank=True,
        null=True,
550
551
552
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
553
554
        blank=True,
        null=True,
555
556
557
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
558
559
        blank=True,
        null=True,
560
561
562
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
563
564
        blank=True,
        null=True,
565
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
566
    nb_aromatic_sssr = models.IntegerField(
567
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
568
569
        blank=True,
        null=True,
570
571
572
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
573
574
        blank=True,
        null=True,
575
576
577
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
578
579
        blank=True,
        null=True,
580
581
582
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
583
584
        blank=True,
        null=True,
585
586
587
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
588
589
        blank=True,
        null=True,
590
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
591
    nb_bonds_non_h = models.IntegerField(
592
        verbose_name='Number of bonds not involving a hydrogen',
593
594
        blank=True,
        null=True,
595
596
597
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
598
599
        blank=True,
        null=True,
600
601
602
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
603
604
        blank=True,
        null=True,
605
606
607
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
608
609
        blank=True,
        null=True,
610
611
612
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
613
614
        blank=True,
        null=True,
615
616
617
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
618
619
        blank=True,
        null=True,
620
621
622
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
623
624
        blank=True,
        null=True,
625
626
627
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
628
629
        blank=True,
        null=True,
630
631
632
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
633
634
        blank=True,
        null=True,
635
636
637
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
638
639
        blank=True,
        null=True,
640
641
642
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
643
644
        blank=True,
        null=True,
645
646
647
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
648
649
        blank=True,
        null=True,
650
651
652
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
653
654
        blank=True,
        null=True,
655
656
657
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
658
659
        blank=True,
        null=True,
660
661
662
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
663
664
        blank=True,
        null=True,
665
666
667
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
668
669
        blank=True,
        null=True,
670
671
672
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
673
674
        blank=True,
        null=True,
675
676
677
    )
    inchi = models.TextField(
        verbose_name='InChi',
678
679
        blank=True,
        null=True,
680
681
682
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
683
684
        blank=True,
        null=True,
685
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
686
    randic_index = models.DecimalField(
687
688
689
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
690
691
        blank=True,
        null=True,
692
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
693
    rdf070m = models.DecimalField(
694
695
696
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
697
698
        blank=True,
        null=True,
699
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
700
    rotatable_bond_fraction = models.DecimalField(
701
702
703
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
704
705
        blank=True,
        null=True,
706
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
707
    sum_atom_polar = models.DecimalField(
708
709
710
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
711
712
        blank=True,
        null=True,
713
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
714
    sum_atom_vol_vdw = models.DecimalField(
715
716
717
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
718
719
        blank=True,
        null=True,
720
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
721
    tpsa = models.DecimalField(
722
723
724
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
725
726
        blank=True,
        null=True,
727
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
728
    ui = models.DecimalField(
729
730
731
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
732
733
        blank=True,
        null=True,
734
735
736
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
737
738
        blank=True,
        null=True,
739
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
740
    common_name = models.CharField(
741
742
743
744
745
746
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
747
    pubchem_id = models.CharField(
748
749
750
751
752
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
753
    chemspider_id = models.CharField(
754
755
756
757
758
759
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
760
    chembl_id = models.CharField(
761
762
763
764
765
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
766
    iupac_name = models.CharField(
767
768
769
770
771
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
772

773
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
774
        ordering = ['id']
775

776
777
778
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
779
        # fingerprints to compute drugbank similarities are in settings module, default FP2
780
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
781
782
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
783
784
785
786
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
787
788
789
790
791
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
792
793
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
794
795
796
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
797
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
798
799
        """
        return RefCompoundBiblio.objects.filter(compound=self)
800

801
802
803
804
805
806
807
808
809
810
811
812
813
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

814
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
815
    def compound_action_ligand_ids(self):
816
817
818
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
819
        ligand_ids = set()
820
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
821
822
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
823

824
825
    @property
    def best_pXC50_activity(self):
826
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
827
828
829
830
831
832

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
833
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
834

835
    @property
836
    def best_pXC50_activity_ppi_name(self):
837
        """
838
        Name of the PPI corresponding to the best PXC50 activity
839
        """
840
841
842
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
843
844
845
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

846
847
848
849
850
851
852
853
854
855
856
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

857
858
859
860
861
862
863
864
865
866
867
868
869
870
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

871
872
873
874
875
876
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
877

878
879
880
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
881

882
883
884
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
885
        self.inchikey = smi2inchikey(self.canonical_smile)
886
        self.compute_drugbank_compound_similarity()
887

888
889
890
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
891

892
893
894
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
895
    fingerprint = models.TextField('Fingerprint')
896
897
898
899
900
901
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
902
            ('canonical_smiles', 'fingerprint', 'compound'))
903

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
904

905
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
906
907
908
909
    """
    Compute the Tanimoto similarity between a given SMILES and the compounds
    then insert the results in CompoundTanimoto
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
910
911
    if CompoundTanimoto.objects.filter(canonical_smiles=smiles_query, fingerprint=fingerprint).count() == 0:
        smiles_dict = {c.id: c.canonical_smile for c in Compound.objects.all()}
912
        fingerprinter = FingerPrinter(fingerprint)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
913
        # 1. compute tanimoto for SMILES query vs all compounds
914
        tanimoto_dict = fingerprinter.tanimoto_smiles(smiles_query, smiles_dict)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
915
        # 2. insert results in a table with three fields: SMILES query, compound id, tanimoto index
916
917
        cts = []
        for id_, smiles in smiles_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
918
919
            cts.append(CompoundTanimoto(canonical_smiles=smiles_query, fingerprint=fingerprint,
                                        compound=Compound.objects.get(id=id_), tanimoto=tanimoto_dict[id_]))
920
        CompoundTanimoto.objects.bulk_create(cts)
921

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
922

923
class PcaBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
924
925
926
927
    """
    PCA biplot data
    the table contains all the data as one JSON text in one row
    """
928
929
    pca_biplot_data = models.TextField(
        'PCA biplot JSON data', blank=True