models.py 41 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.core.exceptions import ValidationError
12
from django.db import models, transaction
13
14
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
19
20
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
21

Hervé  MENAGER's avatar
Hervé MENAGER committed
22

23
24
25
26
27
28
29
30
31
32
33
34
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
35
        if kwargs.get('autofill') is True or not self.is_autofill_done():
36
            self.autofill()
37
38
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
39
        super(AutoFillableModel, self).save(*args, **kwargs)
40

41
42
43
44
45
46
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

47
48

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
49
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
50
51
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
52
53
    """
    SOURCES = (
54
55
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
56
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
57
    )
58
59
60
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
61
        DO=re.compile("^10.\d{4,9}/.+$"),
62
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
64
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
66
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
67
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
68
69
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
70
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
71
72
73
74
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
75
76
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
77
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
78

79
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
80
81
82
83
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
84
85
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
86
        elif self.source == 'PT':
87
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
88
89
90
91
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
92
93
94
95
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
96

97
98
99
    def is_autofill_done(self):
        return len(self.title) > 0

100
101
    def clean(self):
        super().clean()
102
103
104
105
106
107
108
109
110
111
112
113
114
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
        return self.source == 'PM'

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
115
116
117
118
119
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
120
        return True
121

Hervé  MENAGER's avatar
Hervé MENAGER committed
122
123
124
    class Meta:
        verbose_name_plural = "bibliographies"

125
126
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
127

Hervé  MENAGER's avatar
Hervé MENAGER committed
128

129
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
130
131
132
133
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
134
135
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
136
    name = models.CharField('Organism name', max_length=200)
137

138
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
139
140
141
142
        """
        fetch information from external services
        (NCBI Entrez)
        """
143
144
145
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

146
147
148
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
149
150
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
151

Hervé  MENAGER's avatar
Hervé MENAGER committed
152

153
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
154
155
156
157
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
158
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
159
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
160
161
    description = models.CharField('description', max_length=500)

162
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
163
164
165
166
        """
        fetch information from external services
        (EBI OLS)
        """
167
168
169
        info = get_go_info(self.go_id)
        self.description = info['label']

170
171
172
173
    @property
    def name(self):
        return self.go_id + ' ' + self.description

174
175
176
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
177

178
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
179
180
181
182
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
183
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
184
185
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
186
187
188
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
189
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
190
    molecular_functions = models.ManyToManyField(MolecularFunction)
191
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
192

193
    @transaction.atomic
194
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
195
196
197
198
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
199
        info = get_uniprot_info(self.uniprot_id)
200
        self.recommended_name_long = info['recommended_name']
201
202
203
204
205
206
207
208
209
210

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

211
        self.entry_name = info['entry_name']
212
        self.short_name = info['short_name']
213
214
215
216
217
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
218
            taxonomy.save(autofill=True)
219
        self.organism = taxonomy
220
        super(Protein, self).save()
221

222
223
224
225
226
227
        for go_id in info['molecular_functions']:
            try:
                mol_function = MolecularFunction.objects.get(go_id=go_id)
            except MolecularFunction.DoesNotExist:
                mol_function = MolecularFunction()
                mol_function.go_id = go_id
228
                mol_function.save(autofill=True)
229
            self.molecular_functions.add(mol_function)
230

231
232
233
234
235
236
237
238
239
        for domain_id in info['domains']:
            try:
                domain = Domain.objects.get(pfam_acc=domain_id)
            except MolecularFunction.DoesNotExist:
                domain = Domain()
                domain.pfam_acc = domain_id
                domain.save(autofill=True)
            self.domains.add(domain)

240
241
242
    def is_autofill_done(self):
        return len(self.gene_name) > 0

243
244
245
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
246

247
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
248
249
250
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
251
252
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
253
    pfam_description = models.CharField('Pfam Description', max_length=100)
254
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
255

Hervé  MENAGER's avatar
Hervé MENAGER committed
256
257
    # TODO: what is this field? check database
    # contents
258

259
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
260
261
262
263
        """
        fetch information from external services
        (PFAM)
        """
264
265
266
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
267

268
269
270
271
    @property
    def name(self):
        return self.pfam_id

272
273
274
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
275

276
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
277
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
278
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
279
    """
280
281
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
282
283
284
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
285
286
    class Meta:
        verbose_name_plural = "complexes"
287

288
289
290
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

291
292
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
293

294

295
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
296
297
298
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
299
    ppp_copy_nb_per_p = models.IntegerField(
300
301
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
302

303
304
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
305
306


307
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
308
309
310
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
311

312
313
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
314

Hervé  MENAGER's avatar
Hervé MENAGER committed
315

316
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
317
318
319
    """
    Symmetry of a PPI
    """
320
321
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
322

323
324
325
    class Meta:
        verbose_name_plural = "symmetries"

326
327
328
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

329
330

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
331
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
332

Hervé  MENAGER's avatar
Hervé MENAGER committed
333
    # is there any database/nomenclature for diseases?
334
335
336
337

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
338

Hervé  MENAGER's avatar
Hervé MENAGER committed
339
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
340
341
342
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
343
344
    name = models.CharField('Name', max_length=30, unique=True)

345
346
347
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
348
349
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
350

Hervé  MENAGER's avatar
Hervé MENAGER committed
351

352
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
353
354
355
    """
    PPI
    """
356
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
357
358
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
359
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
360
    diseases = models.ManyToManyField(Disease)
361
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
362
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
363

364
    def __str__(self):
365
        return 'PPI #{} on {}'.format(self.id, self.name)
366

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
367
368
369
    def is_autofill_done(self):
        return self.name == ""

370
371
372
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
373
374
375
376
377
378
379

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

380
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
381
        all_protein_names = set(
382
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
383
384
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
385
386
387
388
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
389
        if partner_str != '':
390
391
            name += ' / ' + partner_str
        return name
392

Hervé  MENAGER's avatar
Hervé MENAGER committed
393

Hervé  MENAGER's avatar
Hervé MENAGER committed
394
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
395
396
397
    """
    PPI Complex
    """
398
399
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
400
    cc_nb = models.IntegerField(
401
402
403
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
404
405
406
407

    class Meta:
        verbose_name_plural = "Ppi complexes"

408
409
410
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
411

412
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
413
414
415
416
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
417
418

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
419
        # @formatter:off
420
        qs = super().get_queryset()
421
        # with number of publications
422
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
423
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
424
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
449
450
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
451
452
453
454
455
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
456
457
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
458
        # PDB ligand available
459
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
460
461
462
463
464
465
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
466
        # cellular tests performed
467
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
468
        # inhibition tests performed
469
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
470
        # stabilisation tests performed
471
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
472
        # binding tests performed
473
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
474
        # pharmacokinetic tests performed
475
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
476
        # cytotoxicity tests performedudy
477
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
478
        # in silico st performed
479
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
480
481
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
482
        #@formatter:on
483
484
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
485

486
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
487
488
489
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
490
    objects = CompoundManager()
491
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
492
        verbose_name='Canonical Smiles',
493
494
        unique=True,
    )
495
496
497
498
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
499
    aromatic_ratio = models.DecimalField(
500
501
502
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
503
504
        blank=True,
        null=True,
505
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
506
    balaban_index = models.DecimalField(
507
508
509
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
510
511
        blank=True,
        null=True,
512
513
514
515
516
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
517
518
        blank=True,
        null=True,
519
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
520
    gc_molar_refractivity = models.DecimalField(
521
522
523
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
524
525
        blank=True,
        null=True,
526
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
527
    log_d = models.DecimalField(
528
529
530
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
531
532
        blank=True,
        null=True,
533
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
534
    a_log_p = models.DecimalField(
535
536
537
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
538
539
        blank=True,
        null=True,
540
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
541
    mean_atom_vol_vdw = models.DecimalField(
542
543
544
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
545
546
        blank=True,
        null=True,
547
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
548
    molecular_weight = models.DecimalField(
549
550
551
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
552
553
        blank=True,
        null=True,
554
555
556
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
557
558
        blank=True,
        null=True,
559
560
561
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
562
563
        blank=True,
        null=True,
564
565
566
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
567
568
        blank=True,
        null=True,
569
570
571
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
572
573
        blank=True,
        null=True,
574
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
575
    nb_aromatic_sssr = models.IntegerField(
576
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
577
578
        blank=True,
        null=True,
579
580
581
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
582
583
        blank=True,
        null=True,
584
585
586
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
587
588
        blank=True,
        null=True,
589
590
591
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
592
593
        blank=True,
        null=True,
594
595
596
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
597
598
        blank=True,
        null=True,
599
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
600
    nb_bonds_non_h = models.IntegerField(
601
        verbose_name='Number of bonds not involving a hydrogen',
602
603
        blank=True,
        null=True,
604
605
606
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
607
608
        blank=True,
        null=True,
609
610
611
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
612
613
        blank=True,
        null=True,
614
615
616
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
617
618
        blank=True,
        null=True,
619
620
621
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
622
623
        blank=True,
        null=True,
624
625
626
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
627
628
        blank=True,
        null=True,
629
630
631
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
632
633
        blank=True,
        null=True,
634
635
636
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
637
638
        blank=True,
        null=True,
639
640
641
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
642
643
        blank=True,
        null=True,
644
645
646
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
647
648
        blank=True,
        null=True,
649
650
651
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
652
653
        blank=True,
        null=True,
654
655
656
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
657
658
        blank=True,
        null=True,
659
660
661
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
662
663
        blank=True,
        null=True,
664
665
666
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
667
668
        blank=True,
        null=True,
669
670
671
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
672
673
        blank=True,
        null=True,
674
675
676
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
677
678
        blank=True,
        null=True,
679
680
681
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
682
683
        blank=True,
        null=True,
684
685
686
    )
    inchi = models.TextField(
        verbose_name='InChi',
687
688
        blank=True,
        null=True,
689
690
691
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
692
693
        blank=True,
        null=True,
694
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
695
    randic_index = models.DecimalField(
696
697
698
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
699
700
        blank=True,
        null=True,
701
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
702
    rdf070m = models.DecimalField(
703
704
705
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
706
707
        blank=True,
        null=True,
708
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
709
    rotatable_bond_fraction = models.DecimalField(
710
711
712
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
713
714
        blank=True,
        null=True,
715
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
716
    sum_atom_polar = models.DecimalField(
717
718
719
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
720
721
        blank=True,
        null=True,
722
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
723
    sum_atom_vol_vdw = models.DecimalField(
724
725
726
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
727
728
        blank=True,
        null=True,
729
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
730
    tpsa = models.DecimalField(
731
732
733
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
734
735
        blank=True,
        null=True,
736
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
737
    ui = models.DecimalField(
738
739
740
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
741
742
        blank=True,
        null=True,
743
744
745
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
746
747
        blank=True,
        null=True,
748
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
749
    common_name = models.CharField(
750
751
752
753
754
755
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
756
    pubchem_id = models.CharField(
757
758
759
760
761
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
762
    chemspider_id = models.CharField(
763
764
765
766
767
768
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
769
    chembl_id = models.CharField(
770
771
772
773
774
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
775
    iupac_name = models.CharField(
776
777
778
779
780
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
781

782
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
783
        ordering = ['id']
784

785
786
787
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
788
        # fingerprints to compute drugbank similarities are in settings module, default FP2
789
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
790
791
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
792
793
794
795
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
796
797
798
799
800
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
801
802
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
803
804
805
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
806
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
807
808
        """
        return RefCompoundBiblio.objects.filter(compound=self)
809

810
811
812
813
814
815
816
817
818
819
820
821
822
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

823
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
824
    def compound_action_ligand_ids(self):
825
826
827
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
828
        ligand_ids = set()
829
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
830
831
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
832

833
834
    @property
    def best_pXC50_activity(self):
835
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
836
837
838
839
840
841

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
842
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
843

844
    @property
845
    def best_pXC50_activity_ppi_name(self):
846
        """
847
        Name of the PPI corresponding to the best PXC50 activity
848
        """
849
850
851
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
852
853
854
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

855
856
857
858
859
860
861
862
863
864
865
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

866
867
868
869
870
871
872
873
874
875
876
877
878
879
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

880
881
882
883
884
885
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
886

887
888
889
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
890

891
892
893
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
894
        self.inchikey = smi2inchikey(self.canonical_smile)
895
        self.compute_drugbank_compound_similarity()
896

897
898
899
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
900

901
902
903
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
904
    fingerprint = models.TextField('Fingerprint')
905
906
907
908
909
910
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
911
            ('canonical_smiles', 'fingerprint', 'compound'))
912

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
913

914
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
915
916
917
918
    """
    Compute the Tanimoto similarity between a given SMILES and the compounds
    then insert the results in CompoundTanimoto
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
919
920
    if CompoundTanimoto.objects.filter(canonical_smiles=smiles_query, fingerprint=fingerprint).count() == 0:
        smiles_dict = {c.id: c.canonical_smile for c in Compound.objects.all()}
921
        fingerprinter = FingerPrinter(fingerprint)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
922
        # 1. compute tanimoto for SMILES query vs all compounds
923
        tanimoto_dict = fingerprinter.tanimoto_smiles(smiles_query, smiles_dict)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
924
        # 2. insert results in a table with three fields: SMILES query, compound id, tanimoto index
925
926
        cts = []
        for id_, smiles in smiles_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
927
928
            cts.append(CompoundTanimoto(canonical_smiles=smiles_query, fingerprint=fingerprint,
                                        compound=Compound.objects.get(id=id_), tanimoto=tanimoto_dict[id_]))
929
        CompoundTanimoto.objects.bulk_create(cts)
930

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
931

932
class PcaBiplotData(models.Model):