models.py 40 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
12
13
14
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
19
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, get_pfam_info
20

Hervé  MENAGER's avatar
Hervé MENAGER committed
21

22
23
24
25
26
27
28
29
30
31
32
33
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
34
        if kwargs.get('autofill') is True or not self.is_autofill_done():
35
            self.autofill()
36
37
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
38
        super(AutoFillableModel, self).save(*args, **kwargs)
39

40
41
42
43
44
45
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

46
47

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
48
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
49
50
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
51
52
    """
    SOURCES = (
53
54
55
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
        ('DO', 'DOI ID')
Hervé  MENAGER's avatar
Hervé MENAGER committed
56
    )
57
58
59
60
61
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
        DO=re.compile("^.*$"),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
62
63
    source = models.CharField(
        'Bibliographic type', max_length=2, choices=SOURCES, default='PM')
Hervé  MENAGER's avatar
Hervé MENAGER committed
64
65
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
66
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
67
68
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
69
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
70
71
72
73
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
74
75
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
76
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
77

78
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
79
80
81
82
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
83
84
85
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
        else:
86
            info = get_google_patent_info(self.id_source)
87
88
89
90
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
91

92
93
94
    def is_autofill_done(self):
        return len(self.title) > 0

95
96
    def clean(self):
        super().clean()
97
98
99
100
101
102
103
104
105
106
107
108
109
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
        return self.source == 'PM'

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
110
111
112
113
114
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
115
        return True
116

Hervé  MENAGER's avatar
Hervé MENAGER committed
117
118
119
    class Meta:
        verbose_name_plural = "bibliographies"

120
121
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
122

Hervé  MENAGER's avatar
Hervé MENAGER committed
123

124
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
125
126
127
128
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
129
130
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
131
    name = models.CharField('Organism name', max_length=200)
132

133
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
134
135
136
137
        """
        fetch information from external services
        (NCBI Entrez)
        """
138
139
140
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

141
142
143
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
144
145
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
146

Hervé  MENAGER's avatar
Hervé MENAGER committed
147

148
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
149
150
151
152
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
153
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
154
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
155
156
    description = models.CharField('description', max_length=500)

157
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
158
159
160
161
        """
        fetch information from external services
        (EBI OLS)
        """
162
163
164
        info = get_go_info(self.go_id)
        self.description = info['label']

165
166
167
168
    @property
    def name(self):
        return self.go_id + ' ' + self.description

169
170
171
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
172

173
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
174
175
176
177
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
178
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
179
180
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
181
182
183
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
184
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
185
186
    molecular_functions = models.ManyToManyField(MolecularFunction)

187
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
188
189
190
191
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
192
        info = get_uniprot_info(self.uniprot_id)
193
        self.recommended_name_long = info['recommended_name']
194
195
        self.gene_name = info['gene']
        self.entry_name = info['entry_name']
196
        self.short_name = info['short_name']
197
198
199
200
201
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
202
            taxonomy.save(autofill=True)
203
        self.organism = taxonomy
204
        super(Protein, self).save()
205
206
207
208
209
210
        for go_id in info['molecular_functions']:
            try:
                mol_function = MolecularFunction.objects.get(go_id=go_id)
            except MolecularFunction.DoesNotExist:
                mol_function = MolecularFunction()
                mol_function.go_id = go_id
211
                mol_function.save(autofill=True)
212
            self.molecular_functions.add(mol_function)
213

214
215
216
    def is_autofill_done(self):
        return len(self.gene_name) > 0

217
218
219
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
220

221
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
222
223
224
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
225
226
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
227
    pfam_description = models.CharField('Pfam Description', max_length=100)
Hervé  MENAGER's avatar
Hervé MENAGER committed
228
    domain_family = models.CharField('Domain family', max_length=25)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
229

Hervé  MENAGER's avatar
Hervé MENAGER committed
230
231
    # TODO: what is this field? check database
    # contents
232

233
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
234
235
236
237
        """
        fetch information from external services
        (PFAM)
        """
238
239
240
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
241

242
243
244
245
    @property
    def name(self):
        return self.pfam_id

246
247
248
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
249

250
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
251
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
252
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
253
    """
254
255
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
256
257
258
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
259
260
    class Meta:
        verbose_name_plural = "complexes"
261

262
263
264
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

265
266
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
267

268

269
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
270
271
272
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
273
    ppp_copy_nb_per_p = models.IntegerField(
274
275
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
276

277
278
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
279
280


281
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
282
283
284
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
285

286
287
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
288

Hervé  MENAGER's avatar
Hervé MENAGER committed
289

290
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
291
292
293
    """
    Symmetry of a PPI
    """
294
295
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
296

297
298
299
    class Meta:
        verbose_name_plural = "symmetries"

300
301
302
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

303
304

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
305
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
306

Hervé  MENAGER's avatar
Hervé MENAGER committed
307
    # is there any database/nomenclature for diseases?
308
309
310
311

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
312

Hervé  MENAGER's avatar
Hervé MENAGER committed
313
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
314
315
316
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
317
318
    name = models.CharField('Name', max_length=30, unique=True)

319
320
321
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
322
323
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
324

Hervé  MENAGER's avatar
Hervé MENAGER committed
325

326
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
327
328
329
    """
    PPI
    """
330
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
331
332
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
333
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
334
    diseases = models.ManyToManyField(Disease)
335
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
336
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
337

338
    def __str__(self):
339
        return 'PPI #{} on {}'.format(self.id, self.name)
340

341
342
343
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
344
345
346
347
348
349
350

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

351
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
352
        all_protein_names = set(
353
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
354
355
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
356
357
358
359
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
360
        if partner_str != '':
361
362
            name += ' / ' + partner_str
        return name
363

Hervé  MENAGER's avatar
Hervé MENAGER committed
364

Hervé  MENAGER's avatar
Hervé MENAGER committed
365
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
366
367
368
    """
    PPI Complex
    """
369
370
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
371
    cc_nb = models.IntegerField(
372
373
374
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
375
376
377
378

    class Meta:
        verbose_name_plural = "Ppi complexes"

379
380
381
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
382

383
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
384
385
386
387
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
388
389

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
390
        # @formatter:off
391
        qs = super().get_queryset()
392
        # with number of publications
393
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
394
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
395
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
420
421
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
422
423
424
425
426
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
427
428
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
429
        # PDB ligand available
430
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
431
432
433
434
435
436
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
437
        # cellular tests performed
438
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
439
        # inhibition tests performed
440
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
441
        # stabilisation tests performed
442
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
443
        # binding tests performed
444
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
445
        # pharmacokinetic tests performed
446
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
447
        # cytotoxicity tests performedudy
448
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
449
        # in silico st performed
450
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
451
452
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
453
        #@formatter:on
454
455
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
456

457
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
458
459
460
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
461
    objects = CompoundManager()
462
    canonical_smile = models.TextField(
463
464
465
        verbose_name='Canonical Smile',
        unique=True,
    )
466
467
468
469
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
470
    aromatic_ratio = models.DecimalField(
471
472
473
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
474
475
        blank=True,
        null=True,
476
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
477
    balaban_index = models.DecimalField(
478
479
480
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
481
482
        blank=True,
        null=True,
483
484
485
486
487
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
488
489
        blank=True,
        null=True,
490
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
491
    gc_molar_refractivity = models.DecimalField(
492
493
494
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
495
496
        blank=True,
        null=True,
497
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
498
    log_d = models.DecimalField(
499
500
501
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
502
503
        blank=True,
        null=True,
504
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
505
    a_log_p = models.DecimalField(
506
507
508
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
509
510
        blank=True,
        null=True,
511
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
512
    mean_atom_vol_vdw = models.DecimalField(
513
514
515
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
516
517
        blank=True,
        null=True,
518
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
519
    molecular_weight = models.DecimalField(
520
521
522
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
523
524
        blank=True,
        null=True,
525
526
527
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
528
529
        blank=True,
        null=True,
530
531
532
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
533
534
        blank=True,
        null=True,
535
536
537
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
538
539
        blank=True,
        null=True,
540
541
542
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
543
544
        blank=True,
        null=True,
545
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
546
    nb_aromatic_sssr = models.IntegerField(
547
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
548
549
        blank=True,
        null=True,
550
551
552
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
553
554
        blank=True,
        null=True,
555
556
557
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
558
559
        blank=True,
        null=True,
560
561
562
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
563
564
        blank=True,
        null=True,
565
566
567
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
568
569
        blank=True,
        null=True,
570
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
571
    nb_bonds_non_h = models.IntegerField(
572
        verbose_name='Number of bonds not involving a hydrogen',
573
574
        blank=True,
        null=True,
575
576
577
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
578
579
        blank=True,
        null=True,
580
581
582
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
583
584
        blank=True,
        null=True,
585
586
587
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
588
589
        blank=True,
        null=True,
590
591
592
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
593
594
        blank=True,
        null=True,
595
596
597
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
598
599
        blank=True,
        null=True,
600
601
602
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
603
604
        blank=True,
        null=True,
605
606
607
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
608
609
        blank=True,
        null=True,
610
611
612
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
613
614
        blank=True,
        null=True,
615
616
617
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
618
619
        blank=True,
        null=True,
620
621
622
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
623
624
        blank=True,
        null=True,
625
626
627
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
628
629
        blank=True,
        null=True,
630
631
632
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
633
634
        blank=True,
        null=True,
635
636
637
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
638
639
        blank=True,
        null=True,
640
641
642
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
643
644
        blank=True,
        null=True,
645
646
647
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
648
649
        blank=True,
        null=True,
650
651
652
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
653
654
        blank=True,
        null=True,
655
656
657
    )
    inchi = models.TextField(
        verbose_name='InChi',
658
659
        blank=True,
        null=True,
660
661
662
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
663
664
        blank=True,
        null=True,
665
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
666
    randic_index = models.DecimalField(
667
668
669
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
670
671
        blank=True,
        null=True,
672
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
673
    rdf070m = models.DecimalField(
674
675
676
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
677
678
        blank=True,
        null=True,
679
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
680
    rotatable_bond_fraction = models.DecimalField(
681
682
683
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
684
685
        blank=True,
        null=True,
686
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
687
    sum_atom_polar = models.DecimalField(
688
689
690
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
691
692
        blank=True,
        null=True,
693
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
694
    sum_atom_vol_vdw = models.DecimalField(
695
696
697
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
698
699
        blank=True,
        null=True,
700
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
701
    tpsa = models.DecimalField(
702
703
704
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
705
706
        blank=True,
        null=True,
707
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
708
    ui = models.DecimalField(
709
710
711
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
712
713
        blank=True,
        null=True,
714
715
716
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
717
718
        blank=True,
        null=True,
719
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
720
    common_name = models.CharField(
721
722
723
724
725
726
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
727
    pubchem_id = models.CharField(
728
729
730
731
732
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
733
    chemspider_id = models.CharField(
734
735
736
737
738
739
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
740
    chembl_id = models.CharField(
741
742
743
744
745
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
746
    iupac_name = models.CharField(
747
748
749
750
751
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
752

753
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
754
        ordering = ['id']
755

756
757
758
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
759
        # fingerprints to compute drugbank similarities are in settings module, default FP2
760
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
761
762
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
763
764
765
766
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
767
768
769
770
771
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
772
773
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
774
775
776
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
777
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
778
779
        """
        return RefCompoundBiblio.objects.filter(compound=self)
780

781
782
783
784
785
786
787
788
789
790
791
792
793
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

794
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
795
    def compound_action_ligand_ids(self):
796
797
798
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
799
        ligand_ids = set()
800
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
801
802
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
803

804
805
    @property
    def best_pXC50_activity(self):
806
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
807
808
809
810
811
812

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
813
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
814

815
    @property
816
    def best_pXC50_activity_ppi_name(self):
817
        """
818
        Name of the PPI corresponding to the best PXC50 activity
819
        """
820
821
822
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
823
824
825
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

826
827
828
829
830
831
832
833
834
835
836
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

837
838
839
840
841
842
843
844
845
846
847
848
849
850
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

851
852
853
854
855
856
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
857

858
859
860
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
861

862
863
864
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
865
        self.inchikey = smi2inchikey(self.canonical_smile)
866
        self.compute_drugbank_compound_similarity()
867

868
869
870
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
871

872
873
874
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
875
    fingerprint = models.TextField('Fingerprint')
876
877
878
879
880
881
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
882
            ('canonical_smiles', 'fingerprint', 'compound'))
883

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
884

885
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
886
887
888
889
    """
    Compute the Tanimoto similarity between a given SMILES and the compounds
    then insert the results in CompoundTanimoto
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
890
891
    if CompoundTanimoto.objects.filter(canonical_smiles=smiles_query, fingerprint=fingerprint).count() == 0:
        smiles_dict = {c.id: c.canonical_smile for c in Compound.objects.all()}
892
        fingerprinter = FingerPrinter(fingerprint)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
893
        # 1. compute tanimoto for SMILES query vs all compounds
894
        tanimoto_dict = fingerprinter.tanimoto_smiles(smiles_query, smiles_dict)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
895
        # 2. insert results in a table with three fields: SMILES query, compound id, tanimoto index
896
897
        cts = []
        for id_, smiles in smiles_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
898
899
            cts.append(CompoundTanimoto(canonical_smiles=smiles_query, fingerprint=fingerprint,
                                        compound=Compound.objects.get(id=id_), tanimoto=tanimoto_dict[id_]))
900
        CompoundTanimoto.objects.bulk_create(cts)
901

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
902

903
class PcaBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
904
905
906
907
    """
    PCA biplot data
    the table contains all the data as one JSON text in one row
    """
908
909
    pca_biplot_data = models.TextField(
        'PCA biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
910

911

912
class LeLleBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
913
914
915
916
    """
    LE-LLE biplot data
    the table contains all the data as one JSON text in one row
    """
917
918
    le_lle_biplot_data = models.TextField(
        'LE-LLE biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
919

920

921
class CellLine(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
922
923
924
    """
    Cell lines
    """
925
    name = models.CharField('Name', max_length=50, unique=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
926

927
928
929
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
930

931
class TestActivityDescription(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
932
933
934
    """
    Activity test descriptions
    """