models.py 40.3 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
12
13
14
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
19
20
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
21

Hervé  MENAGER's avatar
Hervé MENAGER committed
22

23
24
25
26
27
28
29
30
31
32
33
34
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
35
        if kwargs.get('autofill') is True or not self.is_autofill_done():
36
            self.autofill()
37
38
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
39
        super(AutoFillableModel, self).save(*args, **kwargs)
40

41
42
43
44
45
46
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

47
48

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
49
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
50
51
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
52
53
    """
    SOURCES = (
54
55
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
56
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
57
    )
58
59
60
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
61
        DO=re.compile("^10.\d{4,9}/.+$"),
62
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
64
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
66
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
67
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
68
69
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
70
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
71
72
73
74
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
75
76
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
77
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
78

79
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
80
81
82
83
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
84
85
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
86
        elif self.source == 'PT':
87
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
88
89
90
91
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
92
93
94
95
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
96

97
98
99
    def is_autofill_done(self):
        return len(self.title) > 0

100
101
    def clean(self):
        super().clean()
102
103
104
105
106
107
108
109
110
111
112
113
114
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
        return self.source == 'PM'

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
115
116
117
118
119
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
120
        return True
121

Hervé  MENAGER's avatar
Hervé MENAGER committed
122
123
124
    class Meta:
        verbose_name_plural = "bibliographies"

125
126
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
127

Hervé  MENAGER's avatar
Hervé MENAGER committed
128

129
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
130
131
132
133
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
134
135
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
136
    name = models.CharField('Organism name', max_length=200)
137

138
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
139
140
141
142
        """
        fetch information from external services
        (NCBI Entrez)
        """
143
144
145
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

146
147
148
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
149
150
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
151

Hervé  MENAGER's avatar
Hervé MENAGER committed
152

153
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
154
155
156
157
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
158
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
159
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
160
161
    description = models.CharField('description', max_length=500)

162
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
163
164
165
166
        """
        fetch information from external services
        (EBI OLS)
        """
167
168
169
        info = get_go_info(self.go_id)
        self.description = info['label']

170
171
172
173
    @property
    def name(self):
        return self.go_id + ' ' + self.description

174
175
176
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
177

178
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
179
180
181
182
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
183
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
184
185
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
186
187
188
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
189
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
190
191
    molecular_functions = models.ManyToManyField(MolecularFunction)

192
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
193
194
195
196
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
197
        info = get_uniprot_info(self.uniprot_id)
198
        self.recommended_name_long = info['recommended_name']
199
200
        self.gene_name = info['gene']
        self.entry_name = info['entry_name']
201
        self.short_name = info['short_name']
202
203
204
205
206
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
207
            taxonomy.save(autofill=True)
208
        self.organism = taxonomy
209
        super(Protein, self).save()
210
211
212
213
214
215
        for go_id in info['molecular_functions']:
            try:
                mol_function = MolecularFunction.objects.get(go_id=go_id)
            except MolecularFunction.DoesNotExist:
                mol_function = MolecularFunction()
                mol_function.go_id = go_id
216
                mol_function.save(autofill=True)
217
            self.molecular_functions.add(mol_function)
218

219
220
221
    def is_autofill_done(self):
        return len(self.gene_name) > 0

222
223
224
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
225

226
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
227
228
229
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
230
231
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
232
    pfam_description = models.CharField('Pfam Description', max_length=100)
Hervé  MENAGER's avatar
Hervé MENAGER committed
233
    domain_family = models.CharField('Domain family', max_length=25)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
234

Hervé  MENAGER's avatar
Hervé MENAGER committed
235
236
    # TODO: what is this field? check database
    # contents
237

238
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
239
240
241
242
        """
        fetch information from external services
        (PFAM)
        """
243
244
245
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
246

247
248
249
250
    @property
    def name(self):
        return self.pfam_id

251
252
253
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
254

255
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
256
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
257
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
258
    """
259
260
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
261
262
263
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
264
265
    class Meta:
        verbose_name_plural = "complexes"
266

267
268
269
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

270
271
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
272

273

274
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
275
276
277
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
278
    ppp_copy_nb_per_p = models.IntegerField(
279
280
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
281

282
283
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
284
285


286
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
287
288
289
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
290

291
292
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
293

Hervé  MENAGER's avatar
Hervé MENAGER committed
294

295
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
296
297
298
    """
    Symmetry of a PPI
    """
299
300
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
301

302
303
304
    class Meta:
        verbose_name_plural = "symmetries"

305
306
307
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

308
309

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
310
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
311

Hervé  MENAGER's avatar
Hervé MENAGER committed
312
    # is there any database/nomenclature for diseases?
313
314
315
316

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
317

Hervé  MENAGER's avatar
Hervé MENAGER committed
318
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
319
320
321
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
322
323
    name = models.CharField('Name', max_length=30, unique=True)

324
325
326
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
327
328
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
329

Hervé  MENAGER's avatar
Hervé MENAGER committed
330

331
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
332
333
334
    """
    PPI
    """
335
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
336
337
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
338
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
339
    diseases = models.ManyToManyField(Disease)
340
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
341
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
342

343
    def __str__(self):
344
        return 'PPI #{} on {}'.format(self.id, self.name)
345

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
346
347
348
    def is_autofill_done(self):
        return self.name == ""

349
350
351
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
352
353
354
355
356
357
358

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

359
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
360
        all_protein_names = set(
361
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
362
363
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
364
365
366
367
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
368
        if partner_str != '':
369
370
            name += ' / ' + partner_str
        return name
371

Hervé  MENAGER's avatar
Hervé MENAGER committed
372

Hervé  MENAGER's avatar
Hervé MENAGER committed
373
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
374
375
376
    """
    PPI Complex
    """
377
378
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
379
    cc_nb = models.IntegerField(
380
381
382
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
383
384
385
386

    class Meta:
        verbose_name_plural = "Ppi complexes"

387
388
389
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
390

391
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
392
393
394
395
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
396
397

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
398
        # @formatter:off
399
        qs = super().get_queryset()
400
        # with number of publications
401
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
402
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
403
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
428
429
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
430
431
432
433
434
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
435
436
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
437
        # PDB ligand available
438
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
439
440
441
442
443
444
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
445
        # cellular tests performed
446
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
447
        # inhibition tests performed
448
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
449
        # stabilisation tests performed
450
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
451
        # binding tests performed
452
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
453
        # pharmacokinetic tests performed
454
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
455
        # cytotoxicity tests performedudy
456
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
457
        # in silico st performed
458
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
459
460
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
461
        #@formatter:on
462
463
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
464

465
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
466
467
468
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
469
    objects = CompoundManager()
470
    canonical_smile = models.TextField(
471
472
473
        verbose_name='Canonical Smile',
        unique=True,
    )
474
475
476
477
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
478
    aromatic_ratio = models.DecimalField(
479
480
481
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
482
483
        blank=True,
        null=True,
484
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
485
    balaban_index = models.DecimalField(
486
487
488
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
489
490
        blank=True,
        null=True,
491
492
493
494
495
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
496
497
        blank=True,
        null=True,
498
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
499
    gc_molar_refractivity = models.DecimalField(
500
501
502
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
503
504
        blank=True,
        null=True,
505
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
506
    log_d = models.DecimalField(
507
508
509
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
510
511
        blank=True,
        null=True,
512
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
513
    a_log_p = models.DecimalField(
514
515
516
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
517
518
        blank=True,
        null=True,
519
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
520
    mean_atom_vol_vdw = models.DecimalField(
521
522
523
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
524
525
        blank=True,
        null=True,
526
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
527
    molecular_weight = models.DecimalField(
528
529
530
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
531
532
        blank=True,
        null=True,
533
534
535
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
536
537
        blank=True,
        null=True,
538
539
540
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
541
542
        blank=True,
        null=True,
543
544
545
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
546
547
        blank=True,
        null=True,
548
549
550
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
551
552
        blank=True,
        null=True,
553
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
554
    nb_aromatic_sssr = models.IntegerField(
555
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
556
557
        blank=True,
        null=True,
558
559
560
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
561
562
        blank=True,
        null=True,
563
564
565
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
566
567
        blank=True,
        null=True,
568
569
570
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
571
572
        blank=True,
        null=True,
573
574
575
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
576
577
        blank=True,
        null=True,
578
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
579
    nb_bonds_non_h = models.IntegerField(
580
        verbose_name='Number of bonds not involving a hydrogen',
581
582
        blank=True,
        null=True,
583
584
585
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
586
587
        blank=True,
        null=True,
588
589
590
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
591
592
        blank=True,
        null=True,
593
594
595
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
596
597
        blank=True,
        null=True,
598
599
600
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
601
602
        blank=True,
        null=True,
603
604
605
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
606
607
        blank=True,
        null=True,
608
609
610
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
611
612
        blank=True,
        null=True,
613
614
615
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
616
617
        blank=True,
        null=True,
618
619
620
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
621
622
        blank=True,
        null=True,
623
624
625
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
626
627
        blank=True,
        null=True,
628
629
630
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
631
632
        blank=True,
        null=True,
633
634
635
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
636
637
        blank=True,
        null=True,
638
639
640
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
641
642
        blank=True,
        null=True,
643
644
645
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
646
647
        blank=True,
        null=True,
648
649
650
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
651
652
        blank=True,
        null=True,
653
654
655
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
656
657
        blank=True,
        null=True,
658
659
660
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
661
662
        blank=True,
        null=True,
663
664
665
    )
    inchi = models.TextField(
        verbose_name='InChi',
666
667
        blank=True,
        null=True,
668
669
670
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
671
672
        blank=True,
        null=True,
673
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
674
    randic_index = models.DecimalField(
675
676
677
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
678
679
        blank=True,
        null=True,
680
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
681
    rdf070m = models.DecimalField(
682
683
684
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
685
686
        blank=True,
        null=True,
687
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
688
    rotatable_bond_fraction = models.DecimalField(
689
690
691
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
692
693
        blank=True,
        null=True,
694
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
695
    sum_atom_polar = models.DecimalField(
696
697
698
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
699
700
        blank=True,
        null=True,
701
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
702
    sum_atom_vol_vdw = models.DecimalField(
703
704
705
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
706
707
        blank=True,
        null=True,
708
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
709
    tpsa = models.DecimalField(
710
711
712
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
713
714
        blank=True,
        null=True,
715
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
716
    ui = models.DecimalField(
717
718
719
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
720
721
        blank=True,
        null=True,
722
723
724
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
725
726
        blank=True,
        null=True,
727
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
728
    common_name = models.CharField(
729
730
731
732
733
734
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
735
    pubchem_id = models.CharField(
736
737
738
739
740
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
741
    chemspider_id = models.CharField(
742
743
744
745
746
747
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
748
    chembl_id = models.CharField(
749
750
751
752
753
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
754
    iupac_name = models.CharField(
755
756
757
758
759
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
760

761
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
762
        ordering = ['id']
763

764
765
766
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
767
        # fingerprints to compute drugbank similarities are in settings module, default FP2
768
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
769
770
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
771
772
773
774
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
775
776
777
778
779
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
780
781
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
782
783
784
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
785
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
786
787
        """
        return RefCompoundBiblio.objects.filter(compound=self)
788

789
790
791
792
793
794
795
796
797
798
799
800
801
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

802
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
803
    def compound_action_ligand_ids(self):
804
805
806
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
807
        ligand_ids = set()
808
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
809
810
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
811

812
813
    @property
    def best_pXC50_activity(self):
814
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
815
816
817
818
819
820

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
821
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
822

823
    @property
824
    def best_pXC50_activity_ppi_name(self):
825
        """
826
        Name of the PPI corresponding to the best PXC50 activity
827
        """
828
829
830
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
831
832
833
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

834
835
836
837
838
839
840
841
842
843
844
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

845
846
847
848
849
850
851
852
853
854
855
856
857
858
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

859
860
861
862
863
864
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
865

866
867
868
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
869

870
871
872
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
873
        self.inchikey = smi2inchikey(self.canonical_smile)
874
        self.compute_drugbank_compound_similarity()
875

876
877
878
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
879

880
881
882
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
883
    fingerprint = models.TextField('Fingerprint')
884
885
886
887
888
889
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
890
            ('canonical_smiles', 'fingerprint', 'compound'))
891

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
892

893
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
894
895
896
897
    """
    Compute the Tanimoto similarity between a given SMILES and the compounds
    then insert the results in CompoundTanimoto
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
898
899
    if CompoundTanimoto.objects.filter(canonical_smiles=smiles_query, fingerprint=fingerprint).count() == 0:
        smiles_dict = {c.id: c.canonical_smile for c in Compound.objects.all()}
900
        fingerprinter = FingerPrinter(fingerprint)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
901
        # 1. compute tanimoto for SMILES query vs all compounds
902
        tanimoto_dict = fingerprinter.tanimoto_smiles(smiles_query, smiles_dict)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
903
        # 2. insert results in a table with three fields: SMILES query, compound id, tanimoto index
904
905
        cts = []
        for id_, smiles in smiles_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
906
907
            cts.append(CompoundTanimoto(canonical_smiles=smiles_query, fingerprint=fingerprint,
                                        compound=Compound.objects.get(id=id_), tanimoto=tanimoto_dict[id_]))
908
        CompoundTanimoto.objects.bulk_create(cts)
909

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
910

911
class PcaBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
912
913
914
915
    """
    PCA biplot data
    the table contains all the data as one JSON text in one row
    """
916
917
    pca_biplot_data = models.TextField(
        'PCA biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
918

919

920
class LeLleBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
921
922
923
924
    """
    LE-LLE biplot data
    the table contains all the data as one JSON text in one row
    """
925
926
    le_lle_biplot_data = models.TextField(
        'LE-LLE biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
927

928

929
class CellLine(models.Model):