models.py 39.7 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
12
13
14
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
19
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, get_pfam_info
20

Hervé  MENAGER's avatar
Hervé MENAGER committed
21

22
23
24
25
26
27
28
29
30
31
32
33
34
35
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
        if kwargs.get('autofill') is True:
            self.autofill()
36
37
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
38
        super(AutoFillableModel, self).save(*args, **kwargs)
39
40
41


class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
42
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
43
44
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
45
46
    """
    SOURCES = (
47
48
49
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
        ('DO', 'DOI ID')
Hervé  MENAGER's avatar
Hervé MENAGER committed
50
    )
51
52
53
54
55
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
        DO=re.compile("^.*$"),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
56
57
    source = models.CharField(
        'Bibliographic type', max_length=2, choices=SOURCES, default='PM')
Hervé  MENAGER's avatar
Hervé MENAGER committed
58
59
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
60
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
61
62
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
63
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
64
65
66
67
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
68
69
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
70
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
71

72
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
73
74
75
76
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
77
78
79
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
        else:
80
            info = get_google_patent_info(self.id_source)
81
82
83
84
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
85

86
87
    def clean(self):
        super().clean()
88
89
90
91
92
93
94
95
96
97
98
99
100
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
        return self.source == 'PM'

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
101
102
103
104
105
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
106
        return True
107

Hervé  MENAGER's avatar
Hervé MENAGER committed
108
109
110
    class Meta:
        verbose_name_plural = "bibliographies"

111
112
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
113

Hervé  MENAGER's avatar
Hervé MENAGER committed
114

115
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
116
117
118
119
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
120
121
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
122
    name = models.CharField('Organism name', max_length=200)
123

124
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
125
126
127
128
        """
        fetch information from external services
        (NCBI Entrez)
        """
129
130
131
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

132
133
134
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
135
136
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
137

Hervé  MENAGER's avatar
Hervé MENAGER committed
138

139
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
140
141
142
143
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
144
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
145
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
146
147
    description = models.CharField('description', max_length=500)

148
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
149
150
151
152
        """
        fetch information from external services
        (EBI OLS)
        """
153
154
155
        info = get_go_info(self.go_id)
        self.description = info['label']

156
157
158
159
    @property
    def name(self):
        return self.go_id + ' ' + self.description

160
161
162
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
163

164
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
165
166
167
168
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
169
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
170
171
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
172
173
174
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
175
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
176
177
    molecular_functions = models.ManyToManyField(MolecularFunction)

178
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
179
180
181
182
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
183
        info = get_uniprot_info(self.uniprot_id)
184
        self.recommended_name_long = info['recommended_name']
185
186
        self.gene_name = info['gene']
        self.entry_name = info['entry_name']
187
        self.short_name = info['short_name']
188
189
190
191
192
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
193
            taxonomy.save(autofill=True)
194
        self.organism = taxonomy
195
        super(Protein, self).save()
196
197
198
199
200
201
        for go_id in info['molecular_functions']:
            try:
                mol_function = MolecularFunction.objects.get(go_id=go_id)
            except MolecularFunction.DoesNotExist:
                mol_function = MolecularFunction()
                mol_function.go_id = go_id
202
                mol_function.save(autofill=True)
203
            self.molecular_functions.add(mol_function)
204

205
206
207
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
208

209
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
210
211
212
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
213
214
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
215
    pfam_description = models.CharField('Pfam Description', max_length=100)
Hervé  MENAGER's avatar
Hervé MENAGER committed
216
    domain_family = models.CharField('Domain family', max_length=25)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
217

Hervé  MENAGER's avatar
Hervé MENAGER committed
218
219
    # TODO: what is this field? check database
    # contents
220

221
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
222
223
224
225
        """
        fetch information from external services
        (PFAM)
        """
226
227
228
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
229

230
231
232
233
    @property
    def name(self):
        return self.pfam_id

234
235
236
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
237

238
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
239
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
240
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
241
    """
242
243
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
244
245
246
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
247
248
    class Meta:
        verbose_name_plural = "complexes"
249

250
251
252
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

253
254
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
255

256

257
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
258
259
260
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
261
262
263
    ppp_copy_nb_per_p = models.IntegerField(
        'Number of copies of the protein in the pocket')

264
265
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
266
267


268
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
269
270
271
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
272

273
274
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
275

Hervé  MENAGER's avatar
Hervé MENAGER committed
276

277
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
278
279
280
    """
    Symmetry of a PPI
    """
281
282
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
283

284
285
286
    class Meta:
        verbose_name_plural = "symmetries"

287
288
289
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

290
291

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
292
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
293

Hervé  MENAGER's avatar
Hervé MENAGER committed
294
    # is there any database/nomenclature for diseases?
295
296
297
298

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
299

Hervé  MENAGER's avatar
Hervé MENAGER committed
300
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
301
302
303
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
304
305
    name = models.CharField('Name', max_length=30, unique=True)

306
307
308
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
309
310
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
311

Hervé  MENAGER's avatar
Hervé MENAGER committed
312

313
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
314
315
316
    """
    PPI
    """
317
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
318
319
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
320
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
321
    diseases = models.ManyToManyField(Disease)
322
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
323
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
324

325
    def __str__(self):
326
        return 'PPI #{} on {}'.format(self.id, self.name)
327

328
329
330
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
331
332
333
334
335

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
336
        # this is the less efficient query ever seen, FIXME
337
338
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

339
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
340
        all_protein_names = set(
341
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
342
343
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
344
345
346
347
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
348
        if partner_str != '':
349
350
            name += ' / ' + partner_str
        return name
351

Hervé  MENAGER's avatar
Hervé MENAGER committed
352

Hervé  MENAGER's avatar
Hervé MENAGER committed
353
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
354
355
356
    """
    PPI Complex
    """
357
358
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
359
360
    cc_nb = models.IntegerField(
        'Number of copies of the complex in the PPI', default=1)
Hervé  MENAGER's avatar
Hervé MENAGER committed
361
362
363
364

    class Meta:
        verbose_name_plural = "Ppi complexes"

365
366
367
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
368

369
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
370
371
372
373
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
374
375

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
376
        # @formatter:off
377
        qs = super().get_queryset()
378
        # with number of publications
379
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
380
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
381
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
406
407
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
408
409
410
411
412
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
413
414
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
415
        # PDB ligand available
416
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
417
418
419
420
421
422
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
423
        # cellular tests performed
424
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
425
        # inhibition tests performed
426
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
427
        # stabilisation tests performed
428
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
429
        # binding tests performed
430
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
431
        # pharmacokinetic tests performed
432
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
433
        # cytotoxicity tests performedudy
434
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
435
        # in silico st performed
436
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
437
438
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
439
        #@formatter:on
440
441
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
442

443
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
444
445
446
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
447
    objects = CompoundManager()
448
    canonical_smile = models.TextField(
449
450
451
452
453
454
        verbose_name='Canonical Smile',
        unique=True,
    )
    is_macrocycle = models.BooleanField(
        verbose_name='Contains one or more macrocycles',
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
455
    aromatic_ratio = models.DecimalField(
456
457
458
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
459
460
        blank=True,
        null=True,
461
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
462
    balaban_index = models.DecimalField(
463
464
465
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
466
467
        blank=True,
        null=True,
468
469
470
471
472
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
473
474
        blank=True,
        null=True,
475
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
476
    gc_molar_refractivity = models.DecimalField(
477
478
479
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
480
481
        blank=True,
        null=True,
482
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
483
    log_d = models.DecimalField(
484
485
486
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
487
488
        blank=True,
        null=True,
489
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
490
    a_log_p = models.DecimalField(
491
492
493
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
494
495
        blank=True,
        null=True,
496
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
497
    mean_atom_vol_vdw = models.DecimalField(
498
499
500
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
501
502
        blank=True,
        null=True,
503
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
504
    molecular_weight = models.DecimalField(
505
506
507
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
508
509
        blank=True,
        null=True,
510
511
512
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
513
514
        blank=True,
        null=True,
515
516
517
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
518
519
        blank=True,
        null=True,
520
521
522
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
523
524
        blank=True,
        null=True,
525
526
527
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
528
529
        blank=True,
        null=True,
530
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
531
    nb_aromatic_sssr = models.IntegerField(
532
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
533
534
        blank=True,
        null=True,
535
536
537
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
538
539
        blank=True,
        null=True,
540
541
542
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
543
544
        blank=True,
        null=True,
545
546
547
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
548
549
        blank=True,
        null=True,
550
551
552
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
553
554
        blank=True,
        null=True,
555
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
556
    nb_bonds_non_h = models.IntegerField(
557
        verbose_name='Number of bonds not involving a hydrogen',
558
559
        blank=True,
        null=True,
560
561
562
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
563
564
        blank=True,
        null=True,
565
566
567
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
568
569
        blank=True,
        null=True,
570
571
572
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
573
574
        blank=True,
        null=True,
575
576
577
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
578
579
        blank=True,
        null=True,
580
581
582
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
583
584
        blank=True,
        null=True,
585
586
587
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
588
589
        blank=True,
        null=True,
590
591
592
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
593
594
        blank=True,
        null=True,
595
596
597
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
598
599
        blank=True,
        null=True,
600
601
602
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
603
604
        blank=True,
        null=True,
605
606
607
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
608
609
        blank=True,
        null=True,
610
611
612
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
613
614
        blank=True,
        null=True,
615
616
617
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
618
619
        blank=True,
        null=True,
620
621
622
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
623
624
        blank=True,
        null=True,
625
626
627
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
628
629
        blank=True,
        null=True,
630
631
632
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
633
634
        blank=True,
        null=True,
635
636
637
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
638
639
        blank=True,
        null=True,
640
641
642
    )
    inchi = models.TextField(
        verbose_name='InChi',
643
644
        blank=True,
        null=True,
645
646
647
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
648
649
        blank=True,
        null=True,
650
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
651
    randic_index = models.DecimalField(
652
653
654
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
655
656
        blank=True,
        null=True,
657
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
658
    rdf070m = models.DecimalField(
659
660
661
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
662
663
        blank=True,
        null=True,
664
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
665
    rotatable_bond_fraction = models.DecimalField(
666
667
668
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
669
670
        blank=True,
        null=True,
671
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
672
    sum_atom_polar = models.DecimalField(
673
674
675
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
676
677
        blank=True,
        null=True,
678
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
679
    sum_atom_vol_vdw = models.DecimalField(
680
681
682
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
683
684
        blank=True,
        null=True,
685
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
686
    tpsa = models.DecimalField(
687
688
689
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
690
691
        blank=True,
        null=True,
692
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
693
    ui = models.DecimalField(
694
695
696
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
697
698
        blank=True,
        null=True,
699
700
701
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
702
703
        blank=True,
        null=True,
704
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
705
    common_name = models.CharField(
706
707
708
709
710
711
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
712
    pubchem_id = models.CharField(
713
714
715
716
717
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
718
    chemspider_id = models.CharField(
719
720
721
722
723
724
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
725
    chembl_id = models.CharField(
726
727
728
729
730
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
731
    iupac_name = models.CharField(
732
733
734
735
736
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
737

738
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
739
        ordering = ['id']
740

741
742
743
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
744
        # fingerprints to compute drugbank similarities are in settings module, default FP2
745
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
746
747
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
748
749
750
751
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
752
753
754
755
756
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
757
758
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
759
760
761
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
762
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
763
764
        """
        return RefCompoundBiblio.objects.filter(compound=self)
765

766
767
768
769
770
771
772
773
774
775
776
777
778
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

779
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
780
    def compound_action_ligand_ids(self):
781
782
783
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
784
        ligand_ids = set()
785
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
786
787
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
788

789
790
    @property
    def best_pXC50_activity(self):
791
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
792
793
794
795
796
797

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
798
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
799

800
    @property
801
    def best_pXC50_activity_ppi_name(self):
802
        """
803
        Name of the PPI corresponding to the best PXC50 activity
804
        """
805
806
807
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
808
809
810
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

811
812
813
814
815
816
817
818
819
820
821
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

822
823
824
825
826
827
828
829
830
831
832
833
834
835
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

836
837
838
839
840
841
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
842

843
844
845
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
846

847
848
849
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
850
        self.inchikey = smi2inchikey(self.canonical_smile)
851
        self.compute_drugbank_compound_similarity()
852

853
854
855
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
856

857
858
859
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
860
    fingerprint = models.TextField('Fingerprint')
861
862
863
864
865
866
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
867
            ('canonical_smiles', 'fingerprint', 'compound'))
868

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
869

870
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
871
872
873
874
    """
    Compute the Tanimoto similarity between a given SMILES and the compounds
    then insert the results in CompoundTanimoto
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
875
876
    if CompoundTanimoto.objects.filter(canonical_smiles=smiles_query, fingerprint=fingerprint).count() == 0:
        smiles_dict = {c.id: c.canonical_smile for c in Compound.objects.all()}
877
        fingerprinter = FingerPrinter(fingerprint)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
878
        # 1. compute tanimoto for SMILES query vs all compounds
879
        tanimoto_dict = fingerprinter.tanimoto_smiles(smiles_query, smiles_dict)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
880
        # 2. insert results in a table with three fields: SMILES query, compound id, tanimoto index
881
882
        cts = []
        for id_, smiles in smiles_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
883
884
            cts.append(CompoundTanimoto(canonical_smiles=smiles_query, fingerprint=fingerprint,
                                        compound=Compound.objects.get(id=id_), tanimoto=tanimoto_dict[id_]))
885
        CompoundTanimoto.objects.bulk_create(cts)
886

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
887

888
class PcaBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
889
890
891
892
    """
    PCA biplot data
    the table contains all the data as one JSON text in one row
    """
893
894
    pca_biplot_data = models.TextField(
        'PCA biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
895

896

897
class LeLleBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
898
899
900
901
    """
    LE-LLE biplot data
    the table contains all the data as one JSON text in one row
    """
902
903
    le_lle_biplot_data = models.TextField(
        'LE-LLE biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
904

905

906
class CellLine(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
907
908
909
    """
    Cell lines
    """
910
    name = models.CharField('Name', max_length=50, unique=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
911

912
913
914
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
915

916
class TestActivityDescription(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
917
918
919
    """
    Activity test descriptions
    """
920
921
922
923
924
925
926
927
928
    TEST_TYPES = (
        ('BIOCH', 'Biochemical assay'),
        ('CELL', 'Cellular assay')
    )
    TEST_MODULATION_TYPES = (
        ('B', 'Binding'),
        ('I', 'Inhibition'),
        ('S', 'Stabilization')
    )
929
930
931
932
    PROTEIN_BOUND_CONSTRUCTS = (
        ('F', 'Full length'),
        ('U', 'Unspecified')
    )
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
933
934
935
936
    biblio = models.ForeignKey(
        Bibliography,
        on_delete=models.CASCADE,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
937
    protein_domain_bound_complex = models.ForeignKey(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
938
939
940
        ProteinDomainBoundComplex,
        on_delete=models.CASCADE,
    )
941
    ppi = models.ForeignKey(Ppi, models.CASCADE, blank=True, null=True)