models.py 40.1 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
12
13
14
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
19
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, get_pfam_info
20

Hervé  MENAGER's avatar
Hervé MENAGER committed
21

22
23
24
25
26
27
28
29
30
31
32
33
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
34
        if kwargs.get('autofill') is True or not self.is_autofill_done():
35
            self.autofill()
36
37
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
38
        super(AutoFillableModel, self).save(*args, **kwargs)
39

40
41
42
43
44
45
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

46
47

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
48
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
49
50
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
51
52
    """
    SOURCES = (
53
54
55
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
        ('DO', 'DOI ID')
Hervé  MENAGER's avatar
Hervé MENAGER committed
56
    )
57
58
59
60
61
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
        DO=re.compile("^.*$"),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
62
63
    source = models.CharField(
        'Bibliographic type', max_length=2, choices=SOURCES, default='PM')
Hervé  MENAGER's avatar
Hervé MENAGER committed
64
65
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
66
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
67
68
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
69
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
70
71
72
73
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
74
75
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
76
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
77

78
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
79
80
81
82
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
83
84
85
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
        else:
86
            info = get_google_patent_info(self.id_source)
87
88
89
90
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
91

92
93
94
    def is_autofill_done(self):
        return len(self.title) > 0

95
96
    def clean(self):
        super().clean()
97
98
99
100
101
102
103
104
105
106
107
108
109
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
        return self.source == 'PM'

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
110
111
112
113
114
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
115
        return True
116

Hervé  MENAGER's avatar
Hervé MENAGER committed
117
118
119
    class Meta:
        verbose_name_plural = "bibliographies"

120
121
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
122

Hervé  MENAGER's avatar
Hervé MENAGER committed
123

124
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
125
126
127
128
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
129
130
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
131
    name = models.CharField('Organism name', max_length=200)
132

133
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
134
135
136
137
        """
        fetch information from external services
        (NCBI Entrez)
        """
138
139
140
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

141
142
143
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
144
145
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
146

Hervé  MENAGER's avatar
Hervé MENAGER committed
147

148
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
149
150
151
152
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
153
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
154
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
155
156
    description = models.CharField('description', max_length=500)

157
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
158
159
160
161
        """
        fetch information from external services
        (EBI OLS)
        """
162
163
164
        info = get_go_info(self.go_id)
        self.description = info['label']

165
166
167
168
    @property
    def name(self):
        return self.go_id + ' ' + self.description

169
170
171
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
172

173
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
174
175
176
177
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
178
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
179
180
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
181
182
183
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
184
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
185
186
    molecular_functions = models.ManyToManyField(MolecularFunction)

187
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
188
189
190
191
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
192
        info = get_uniprot_info(self.uniprot_id)
193
        self.recommended_name_long = info['recommended_name']
194
195
        self.gene_name = info['gene']
        self.entry_name = info['entry_name']
196
        self.short_name = info['short_name']
197
198
199
200
201
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
202
            taxonomy.save(autofill=True)
203
        self.organism = taxonomy
204
        super(Protein, self).save()
205
206
207
208
209
210
        for go_id in info['molecular_functions']:
            try:
                mol_function = MolecularFunction.objects.get(go_id=go_id)
            except MolecularFunction.DoesNotExist:
                mol_function = MolecularFunction()
                mol_function.go_id = go_id
211
                mol_function.save(autofill=True)
212
            self.molecular_functions.add(mol_function)
213

214
215
216
    def is_autofill_done(self):
        return len(self.gene_name) > 0

217
218
219
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
220

221
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
222
223
224
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
225
226
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
227
    pfam_description = models.CharField('Pfam Description', max_length=100)
Hervé  MENAGER's avatar
Hervé MENAGER committed
228
    domain_family = models.CharField('Domain family', max_length=25)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
229

Hervé  MENAGER's avatar
Hervé MENAGER committed
230
231
    # TODO: what is this field? check database
    # contents
232

233
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
234
235
236
237
        """
        fetch information from external services
        (PFAM)
        """
238
239
240
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
241

242
243
244
245
    @property
    def name(self):
        return self.pfam_id

246
247
248
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
249

250
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
251
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
252
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
253
    """
254
255
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
256
257
258
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
259
260
    class Meta:
        verbose_name_plural = "complexes"
261

262
263
264
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

265
266
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
267

268

269
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
270
271
272
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
273
    ppp_copy_nb_per_p = models.IntegerField(
274
275
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
276

277
278
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
279
280


281
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
282
283
284
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
285

286
287
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
288

Hervé  MENAGER's avatar
Hervé MENAGER committed
289

290
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
291
292
293
    """
    Symmetry of a PPI
    """
294
295
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
296

297
298
299
    class Meta:
        verbose_name_plural = "symmetries"

300
301
302
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

303
304

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
305
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
306

Hervé  MENAGER's avatar
Hervé MENAGER committed
307
    # is there any database/nomenclature for diseases?
308
309
310
311

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
312

Hervé  MENAGER's avatar
Hervé MENAGER committed
313
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
314
315
316
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
317
318
    name = models.CharField('Name', max_length=30, unique=True)

319
320
321
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
322
323
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
324

Hervé  MENAGER's avatar
Hervé MENAGER committed
325

326
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
327
328
329
    """
    PPI
    """
330
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
331
332
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
333
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
334
    diseases = models.ManyToManyField(Disease)
335
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
336
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
337

338
    def __str__(self):
339
        return 'PPI #{} on {}'.format(self.id, self.name)
340

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
341
342
343
    def is_autofill_done(self):
        return self.name == ""

344
345
346
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
347
348
349
350
351
352
353

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

354
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
355
        all_protein_names = set(
356
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
357
358
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
359
360
361
362
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
363
        if partner_str != '':
364
365
            name += ' / ' + partner_str
        return name
366

Hervé  MENAGER's avatar
Hervé MENAGER committed
367

Hervé  MENAGER's avatar
Hervé MENAGER committed
368
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
369
370
371
    """
    PPI Complex
    """
372
373
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
374
    cc_nb = models.IntegerField(
375
376
377
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
378
379
380
381

    class Meta:
        verbose_name_plural = "Ppi complexes"

382
383
384
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
385

386
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
387
388
389
390
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
391
392

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
393
        # @formatter:off
394
        qs = super().get_queryset()
395
        # with number of publications
396
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
397
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
398
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
423
424
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
425
426
427
428
429
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
430
431
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
432
        # PDB ligand available
433
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
434
435
436
437
438
439
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
440
        # cellular tests performed
441
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
442
        # inhibition tests performed
443
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
444
        # stabilisation tests performed
445
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
446
        # binding tests performed
447
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
448
        # pharmacokinetic tests performed
449
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
450
        # cytotoxicity tests performedudy
451
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
452
        # in silico st performed
453
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
454
455
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
456
        #@formatter:on
457
458
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
459

460
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
461
462
463
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
464
    objects = CompoundManager()
465
    canonical_smile = models.TextField(
466
467
468
        verbose_name='Canonical Smile',
        unique=True,
    )
469
470
471
472
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
473
    aromatic_ratio = models.DecimalField(
474
475
476
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
477
478
        blank=True,
        null=True,
479
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
480
    balaban_index = models.DecimalField(
481
482
483
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
484
485
        blank=True,
        null=True,
486
487
488
489
490
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
491
492
        blank=True,
        null=True,
493
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
494
    gc_molar_refractivity = models.DecimalField(
495
496
497
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
498
499
        blank=True,
        null=True,
500
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
501
    log_d = models.DecimalField(
502
503
504
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
505
506
        blank=True,
        null=True,
507
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
508
    a_log_p = models.DecimalField(
509
510
511
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
512
513
        blank=True,
        null=True,
514
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
515
    mean_atom_vol_vdw = models.DecimalField(
516
517
518
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
519
520
        blank=True,
        null=True,
521
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
522
    molecular_weight = models.DecimalField(
523
524
525
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
526
527
        blank=True,
        null=True,
528
529
530
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
531
532
        blank=True,
        null=True,
533
534
535
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
536
537
        blank=True,
        null=True,
538
539
540
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
541
542
        blank=True,
        null=True,
543
544
545
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
546
547
        blank=True,
        null=True,
548
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
549
    nb_aromatic_sssr = models.IntegerField(
550
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
551
552
        blank=True,
        null=True,
553
554
555
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
556
557
        blank=True,
        null=True,
558
559
560
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
561
562
        blank=True,
        null=True,
563
564
565
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
566
567
        blank=True,
        null=True,
568
569
570
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
571
572
        blank=True,
        null=True,
573
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
574
    nb_bonds_non_h = models.IntegerField(
575
        verbose_name='Number of bonds not involving a hydrogen',
576
577
        blank=True,
        null=True,
578
579
580
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
581
582
        blank=True,
        null=True,
583
584
585
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
586
587
        blank=True,
        null=True,
588
589
590
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
591
592
        blank=True,
        null=True,
593
594
595
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
596
597
        blank=True,
        null=True,
598
599
600
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
601
602
        blank=True,
        null=True,
603
604
605
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
606
607
        blank=True,
        null=True,
608
609
610
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
611
612
        blank=True,
        null=True,
613
614
615
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
616
617
        blank=True,
        null=True,
618
619
620
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
621
622
        blank=True,
        null=True,
623
624
625
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
626
627
        blank=True,
        null=True,
628
629
630
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
631
632
        blank=True,
        null=True,
633
634
635
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
636
637
        blank=True,
        null=True,
638
639
640
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
641
642
        blank=True,
        null=True,
643
644
645
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
646
647
        blank=True,
        null=True,
648
649
650
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
651
652
        blank=True,
        null=True,
653
654
655
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
656
657
        blank=True,
        null=True,
658
659
660
    )
    inchi = models.TextField(
        verbose_name='InChi',
661
662
        blank=True,
        null=True,
663
664
665
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
666
667
        blank=True,
        null=True,
668
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
669
    randic_index = models.DecimalField(
670
671
672
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
673
674
        blank=True,
        null=True,
675
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
676
    rdf070m = models.DecimalField(
677
678
679
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
680
681
        blank=True,
        null=True,
682
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
683
    rotatable_bond_fraction = models.DecimalField(
684
685
686
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
687
688
        blank=True,
        null=True,
689
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
690
    sum_atom_polar = models.DecimalField(
691
692
693
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
694
695
        blank=True,
        null=True,
696
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
697
    sum_atom_vol_vdw = models.DecimalField(
698
699
700
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
701
702
        blank=True,
        null=True,
703
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
704
    tpsa = models.DecimalField(
705
706
707
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
708
709
        blank=True,
        null=True,
710
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
711
    ui = models.DecimalField(
712
713
714
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
715
716
        blank=True,
        null=True,
717
718
719
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
720
721
        blank=True,
        null=True,
722
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
723
    common_name = models.CharField(
724
725
726
727
728
729
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
730
    pubchem_id = models.CharField(
731
732
733
734
735
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
736
    chemspider_id = models.CharField(
737
738
739
740
741
742
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
743
    chembl_id = models.CharField(
744
745
746
747
748
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
749
    iupac_name = models.CharField(
750
751
752
753
754
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
755

756
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
757
        ordering = ['id']
758

759
760
761
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
762
        # fingerprints to compute drugbank similarities are in settings module, default FP2
763
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
764
765
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
766
767
768
769
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
770
771
772
773
774
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
775
776
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
777
778
779
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
780
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
781
782
        """
        return RefCompoundBiblio.objects.filter(compound=self)
783

784
785
786
787
788
789
790
791
792
793
794
795
796
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

797
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
798
    def compound_action_ligand_ids(self):
799
800
801
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
802
        ligand_ids = set()
803
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
804
805
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
806

807
808
    @property
    def best_pXC50_activity(self):
809
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
810
811
812
813
814
815

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
816
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
817

818
    @property
819
    def best_pXC50_activity_ppi_name(self):
820
        """
821
        Name of the PPI corresponding to the best PXC50 activity
822
        """
823
824
825
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
826
827
828
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

829
830
831
832
833
834
835
836
837
838
839
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

840
841
842
843
844
845
846
847
848
849
850
851
852
853
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

854
855
856
857
858
859
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
860

861
862
863
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
864

865
866
867
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
868
        self.inchikey = smi2inchikey(self.canonical_smile)
869
        self.compute_drugbank_compound_similarity()
870

871
872
873
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
874

875
876
877
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
878
    fingerprint = models.TextField('Fingerprint')
879
880
881
882
883
884
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
885
            ('canonical_smiles', 'fingerprint', 'compound'))
886

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
887

888
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
889
890
891
892
    """
    Compute the Tanimoto similarity between a given SMILES and the compounds
    then insert the results in CompoundTanimoto
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
893
894
    if CompoundTanimoto.objects.filter(canonical_smiles=smiles_query, fingerprint=fingerprint).count() == 0:
        smiles_dict = {c.id: c.canonical_smile for c in Compound.objects.all()}
895
        fingerprinter = FingerPrinter(fingerprint)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
896
        # 1. compute tanimoto for SMILES query vs all compounds
897
        tanimoto_dict = fingerprinter.tanimoto_smiles(smiles_query, smiles_dict)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
898
        # 2. insert results in a table with three fields: SMILES query, compound id, tanimoto index
899
900
        cts = []
        for id_, smiles in smiles_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
901
902
            cts.append(CompoundTanimoto(canonical_smiles=smiles_query, fingerprint=fingerprint,
                                        compound=Compound.objects.get(id=id_), tanimoto=tanimoto_dict[id_]))
903
        CompoundTanimoto.objects.bulk_create(cts)
904

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
905

906
class PcaBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
907
908
909
910
    """
    PCA biplot data
    the table contains all the data as one JSON text in one row
    """
911
912
    pca_biplot_data = models.TextField(
        'PCA biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
913

914

915
class LeLleBiplotData(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
916
917
918
919
    """
    LE-LLE biplot data
    the table contains all the data as one JSON text in one row
    """
920
921
    le_lle_biplot_data = models.TextField(
        'LE-LLE biplot JSON data', blank=True, null=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
922

923

924
class CellLine(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
925
926
927
    """
    Cell lines
    """
928
    name = models.CharField('Name', max_length=50, unique=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
929

930
931
932
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
933

934
class TestActivityDescription(models.Model):