models.py 41.1 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.core.exceptions import ValidationError
12
from django.db import models, transaction
13
14
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
15
from django.db.models.functions import Cast
16
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
17

18
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
19
20
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
21

Hervé  MENAGER's avatar
Hervé MENAGER committed
22

23
24
25
26
27
28
29
30
31
32
33
34
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
35
        if kwargs.get('autofill') is True or not self.is_autofill_done():
36
            self.autofill()
37
38
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
39
        super(AutoFillableModel, self).save(*args, **kwargs)
40

41
42
43
44
45
46
    def autofill(self):
        raise NotImplementedError()

    def is_autofill_done(self):
        return True

47
48

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
49
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
50
51
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
52
53
    """
    SOURCES = (
54
55
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
56
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
57
    )
58
59
60
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
61
        DO=re.compile("^10.\d{4,9}/.+$"),
62
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
64
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
66
    id_source = models.CharField('Bibliographic ID', max_length=25)
    title = models.CharField('Title', max_length=300)
67
    journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
68
69
    authors_list = models.CharField('Authors list', max_length=500)
    biblio_year = models.PositiveSmallIntegerField('Year')
70
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
71
72
73
74
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
75
76
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
77
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
78

79
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
80
81
82
83
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
84
85
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
86
        elif self.source == 'PT':
87
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
88
89
90
91
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
92
93
94
95
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
96

97
98
99
    def is_autofill_done(self):
        return len(self.title) > 0

100
101
    def clean(self):
        super().clean()
102
103
104
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
105
        return self.source == 'PM' or self.source == 'DO'
106
107
108
109

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
110
111
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
112
113
114
115
116

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
117
118
119
120
121
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
122
        return True
123

Hervé  MENAGER's avatar
Hervé MENAGER committed
124
125
126
    class Meta:
        verbose_name_plural = "bibliographies"

127
128
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
129

Hervé  MENAGER's avatar
Hervé MENAGER committed
130

131
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
132
133
134
135
    """
    Taxonomy IDs (from NCBI Taxonomy) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
136
137
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
138
    name = models.CharField('Organism name', max_length=200)
139

140
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
141
142
143
144
        """
        fetch information from external services
        (NCBI Entrez)
        """
145
146
147
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

148
149
150
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
151
152
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
153

Hervé  MENAGER's avatar
Hervé MENAGER committed
154

155
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
156
157
158
159
    """
    Molecular functions (from Gene Ontology) 
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
160
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
161
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
162
163
    description = models.CharField('description', max_length=500)

164
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
165
166
167
168
        """
        fetch information from external services
        (EBI OLS)
        """
169
170
171
        info = get_go_info(self.go_id)
        self.description = info['label']

172
173
174
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

175
176
177
178
    @property
    def name(self):
        return self.go_id + ' ' + self.description

179
180
181
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
182

183
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
184
185
186
187
    """
    Protein information (from Uniprot) 
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
188
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
189
190
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
191
192
193
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
194
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
195
    molecular_functions = models.ManyToManyField(MolecularFunction)
196
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
197

198
    @transaction.atomic
199
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
200
201
202
203
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
204
        info = get_uniprot_info(self.uniprot_id)
205
        self.recommended_name_long = info['recommended_name']
206
207
208
209
210
211
212
213
214
215

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

216
        self.entry_name = info['entry_name']
217
        self.short_name = info['short_name']
218
219
220
221
222
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
223
            taxonomy.save(autofill=True)
224
        self.organism = taxonomy
225
        super(Protein, self).save()
226

227
        for go_id in info['molecular_functions']:
228
            mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id)
229
            self.molecular_functions.add(mol_function)
230

231
        for domain_id in info['domains']:
232
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
233
234
            self.domains.add(domain)

235
236
237
    def is_autofill_done(self):
        return len(self.gene_name) > 0

238
239
240
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
241

242
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
243
244
245
    """
    Domain (i.e. Protein domain) information (from PFAM) 
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
246
247
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
248
    pfam_description = models.CharField('Pfam Description', max_length=100)
249
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
250

Hervé  MENAGER's avatar
Hervé MENAGER committed
251
252
    # TODO: what is this field? check database
    # contents
253

254
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
255
256
257
258
        """
        fetch information from external services
        (PFAM)
        """
259
260
261
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
262

263
264
265
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

266
267
268
269
    @property
    def name(self):
        return self.pfam_id

270
271
272
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
273

274
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
275
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
276
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
277
    """
278
279
    protein = models.ForeignKey('Protein', models.CASCADE)
    domain = models.ForeignKey('Domain', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
280
281
282
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
283
284
    class Meta:
        verbose_name_plural = "complexes"
285

286
287
288
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

289
290
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
291

292

293
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
294
295
296
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
297
    ppp_copy_nb_per_p = models.IntegerField(
298
299
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
300

301
302
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
303
304


305
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
306
307
308
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
309

310
311
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
312

Hervé  MENAGER's avatar
Hervé MENAGER committed
313

314
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
315
316
317
    """
    Symmetry of a PPI
    """
318
319
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
320

321
322
323
    class Meta:
        verbose_name_plural = "symmetries"

324
325
326
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

327
328

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
329
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
330

Hervé  MENAGER's avatar
Hervé MENAGER committed
331
    # is there any database/nomenclature for diseases?
332
333
334
335

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
336

Hervé  MENAGER's avatar
Hervé MENAGER committed
337
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
338
339
340
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
341
342
    name = models.CharField('Name', max_length=30, unique=True)

343
344
345
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
346
347
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
348

Hervé  MENAGER's avatar
Hervé MENAGER committed
349

350
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
351
352
353
    """
    PPI
    """
354
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
355
356
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
357
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
358
    diseases = models.ManyToManyField(Disease)
359
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
360
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
361

362
    def __str__(self):
363
        return 'PPI #{} on {}'.format(self.id, self.name)
364

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
365
    def is_autofill_done(self):
366
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
367

368
369
370
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
371
372
373
374
375
376
377

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

378
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
379
        all_protein_names = set(
380
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
381
382
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
383
384
385
386
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
387
        if partner_str != '':
388
389
            name += ' / ' + partner_str
        return name
390

Hervé  MENAGER's avatar
Hervé MENAGER committed
391

Hervé  MENAGER's avatar
Hervé MENAGER committed
392
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
393
394
395
    """
    PPI Complex
    """
396
397
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
398
    cc_nb = models.IntegerField(
399
400
401
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
402
403
404
405

    class Meta:
        verbose_name_plural = "Ppi complexes"

406
407
408
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
409

410
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
411
412
413
414
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
415
416

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
417
        # @formatter:off
418
        qs = super().get_queryset()
419
        # with number of publications
420
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
421
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
422
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField())+Cast(F('lipinsky_hba'), IntegerField())+ \
            Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
447
448
        #qs = qs.annotate(veber=F('veber_rb').bitand(F('veber_hba_hbd').bitor(F('veber_tpsa'))))
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
449
450
451
452
453
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
454
455
        #qs = qs.annotate(pfizer=F('pfizer_a_log_p').bitand(F('pfizer_tpsa')))
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
456
        # PDB ligand available
457
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
458
459
460
461
462
463
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
464
        # cellular tests performed
465
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
466
        # inhibition tests performed
467
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
468
        # stabilisation tests performed
469
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
470
        # binding tests performed
471
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
472
        # pharmacokinetic tests performed
473
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
474
        # cytotoxicity tests performedudy
475
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
476
        # in silico st performed
477
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
478
479
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
480
        #@formatter:on
481
482
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
483

484
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
485
486
487
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
488
    objects = CompoundManager()
489
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
490
        verbose_name='Canonical Smiles',
491
492
        unique=True,
    )
493
494
495
496
    is_macrocycle = models.BooleanField(
        verbose_name= _('is_macrocycle_verbose_name'),
        help_text= _('is_macrocycle_help_text'),
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
497
    aromatic_ratio = models.DecimalField(
498
499
500
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
501
502
        blank=True,
        null=True,
503
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
504
    balaban_index = models.DecimalField(
505
506
507
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
508
509
        blank=True,
        null=True,
510
511
512
513
514
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
515
516
        blank=True,
        null=True,
517
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
518
    gc_molar_refractivity = models.DecimalField(
519
520
521
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
522
523
        blank=True,
        null=True,
524
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
525
    log_d = models.DecimalField(
526
527
528
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
529
530
        blank=True,
        null=True,
531
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
532
    a_log_p = models.DecimalField(
533
534
535
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
536
537
        blank=True,
        null=True,
538
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
539
    mean_atom_vol_vdw = models.DecimalField(
540
541
542
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
543
544
        blank=True,
        null=True,
545
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
546
    molecular_weight = models.DecimalField(
547
548
549
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
550
551
        blank=True,
        null=True,
552
553
554
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
555
556
        blank=True,
        null=True,
557
558
559
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
560
561
        blank=True,
        null=True,
562
563
564
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
565
566
        blank=True,
        null=True,
567
568
569
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
570
571
        blank=True,
        null=True,
572
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
573
    nb_aromatic_sssr = models.IntegerField(
574
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
575
576
        blank=True,
        null=True,
577
578
579
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
580
581
        blank=True,
        null=True,
582
583
584
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
585
586
        blank=True,
        null=True,
587
588
589
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
590
591
        blank=True,
        null=True,
592
593
594
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
595
596
        blank=True,
        null=True,
597
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
598
    nb_bonds_non_h = models.IntegerField(
599
        verbose_name='Number of bonds not involving a hydrogen',
600
601
        blank=True,
        null=True,
602
603
604
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
605
606
        blank=True,
        null=True,
607
608
609
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
610
611
        blank=True,
        null=True,
612
613
614
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
615
616
        blank=True,
        null=True,
617
618
619
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
620
621
        blank=True,
        null=True,
622
623
624
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
625
626
        blank=True,
        null=True,
627
628
629
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
630
631
        blank=True,
        null=True,
632
633
634
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
635
636
        blank=True,
        null=True,
637
638
639
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
640
641
        blank=True,
        null=True,
642
643
644
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
645
646
        blank=True,
        null=True,
647
648
649
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
650
651
        blank=True,
        null=True,
652
653
654
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
655
656
        blank=True,
        null=True,
657
658
659
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
660
661
        blank=True,
        null=True,
662
663
664
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
665
666
        blank=True,
        null=True,
667
668
669
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
670
671
        blank=True,
        null=True,
672
673
674
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
675
676
        blank=True,
        null=True,
677
678
679
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
680
681
        blank=True,
        null=True,
682
683
684
    )
    inchi = models.TextField(
        verbose_name='InChi',
685
686
        blank=True,
        null=True,
687
688
689
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
690
691
        blank=True,
        null=True,
692
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
693
    randic_index = models.DecimalField(
694
695
696
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
697
698
        blank=True,
        null=True,
699
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
700
    rdf070m = models.DecimalField(
701
702
703
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
704
705
        blank=True,
        null=True,
706
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
707
    rotatable_bond_fraction = models.DecimalField(
708
709
710
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
711
712
        blank=True,
        null=True,
713
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
714
    sum_atom_polar = models.DecimalField(
715
716
717
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
718
719
        blank=True,
        null=True,
720
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
721
    sum_atom_vol_vdw = models.DecimalField(
722
723
724
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
725
726
        blank=True,
        null=True,
727
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
728
    tpsa = models.DecimalField(
729
730
731
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
732
733
        blank=True,
        null=True,
734
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
735
    ui = models.DecimalField(
736
737
738
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
739
740
        blank=True,
        null=True,
741
742
743
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
744
745
        blank=True,
        null=True,
746
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
747
    common_name = models.CharField(
748
749
750
751
752
753
        verbose_name='Common name',
        unique=True,
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
754
    pubchem_id = models.CharField(
755
756
757
758
759
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
760
    chemspider_id = models.CharField(
761
762
763
764
765
766
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
767
    chembl_id = models.CharField(
768
769
770
771
772
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
773
    iupac_name = models.CharField(
774
775
776
777
778
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
779

780
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
781
        ordering = ['id']
782

783
784
785
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
786
        # fingerprints to compute drugbank similarities are in settings module, default FP2
787
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
788
789
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
790
791
792
793
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
794
795
796
797
798
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
799
800
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
801
802
803
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
804
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
805
806
        """
        return RefCompoundBiblio.objects.filter(compound=self)
807

808
809
810
811
812
813
814
815
816
817
818
819
820
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

821
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
822
    def compound_action_ligand_ids(self):
823
824
825
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
826
        ligand_ids = set()
827
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
828
829
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
830

831
832
    @property
    def best_pXC50_activity(self):
833
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
834
835
836
837
838
839

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
840
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
841

842
    @property
843
    def best_pXC50_activity_ppi_name(self):
844
        """
845
        Name of the PPI corresponding to the best PXC50 activity
846
        """
847
848
849
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
850
851
852
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

853
854
855
856
857
858
859
860
861
862
863
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

864
865
866
867
868
869
870
871
872
873
874
875
876
877
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

878
879
880
881
882
883
    @property
    def families(self):
        """
        return the all PPI families for PPIs involved in the compound activity of the compound
        """
        return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
884

885
886
887
    @property
    def sorted_similar_drugbank_compounds(self):
        return self.drugbankcompoundtanimoto_set.order_by('-tanimoto')
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
888

889
890
891
    def autofill(self):
        # compute InChi and InChiKey
        self.inchi = smi2inchi(self.canonical_smile)
892
        self.inchikey = smi2inchikey(self.canonical_smile)
893
        self.compute_drugbank_compound_similarity()
894

895
896
897
    def __str__(self):
        return 'Compound #{}'.format(self.id)

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
898

899
900
901
class CompoundTanimoto(models.Model):
    canonical_smiles = models.TextField(
        'Canonical Smile')
902
    fingerprint = models.TextField('Fingerprint')
903
904
905
906
907
908
    compound = models.ForeignKey(Compound, models.CASCADE)
    tanimoto = models.DecimalField(
        'Tanimoto value', max_digits=5, decimal_places=4)

    class Meta:
        unique_together = (
909
            ('canonical_smiles', 'fingerprint', 'compound'))
910

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
911

912
def create_tanimoto(smiles_query, fingerprint):
Hervé  MENAGER's avatar
Hervé MENAGER committed
913
914
915
916
    """
    Compute the Tanimoto similarity between a given SMILES and the compounds
    then insert the results in CompoundTanimoto
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
917
918
    if CompoundTanimoto.objects.filter(canonical_smiles=smiles_query, fingerprint=fingerprint).count() == 0:
        smiles_dict = {c.id: c.canonical_smile for c in Compound.objects.all()}
919
        fingerprinter = FingerPrinter(fingerprint)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
920
        # 1. compute tanimoto for SMILES query vs all compounds