models.py 43.1 KB
Newer Older
1
2
3
4
"""
Models used in iPPI-DB
"""

Hervé  MENAGER's avatar
Hervé MENAGER committed
5
from __future__ import unicode_literals
6

7
import operator
8
import re
Hervé  MENAGER's avatar
Hervé MENAGER committed
9

10
from django.conf import settings
11
from django.contrib.auth import get_user_model
12
from django.core.exceptions import ValidationError
13
from django.db import models, transaction
14
15
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
16
from django.db.models.functions import Cast
17
from django.urls import reverse
18
from django.utils.translation import ugettext_lazy as _
Hervé  MENAGER's avatar
Hervé MENAGER committed
19

20
from .utils import FingerPrinter, smi2inchi, smi2inchikey
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
21
22
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
    get_pfam_info, get_doi_info
23

Hervé  MENAGER's avatar
Hervé MENAGER committed
24

25
26
27
28
29
30
31
32
33
34
35
36
class AutoFillableModel(models.Model):
    """
    AutoFillableModel makes it possible to automatically fill model fields from
    external sources in the autofill() method
    The save method allows to either include autofill or not. in autofill kwarg is
    set to True, save() will first call autofill(), otherwise it won't
    """

    class Meta:
        abstract = True

    def save(self, *args, **kwargs):
37
38
39
        auto_fill_needed = not self.is_autofill_done()
        if kwargs.get('autofill') is True or auto_fill_needed:
            auto_fill_needed = True
40
            self.autofill()
41
42
        if 'autofill' in kwargs:
            del kwargs['autofill']
Hervé  MENAGER's avatar
Hervé MENAGER committed
43
        super(AutoFillableModel, self).save(*args, **kwargs)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
44
        if auto_fill_needed:
45
            self.autofill_post_save()
46

47
48
49
    def autofill(self):
        raise NotImplementedError()

50
51
52
53
54
55
56
    def autofill_post_save(self):
        """
        method called after the save is done, usefull for settings m2m relations
        :return:
        """
        pass

57
58
59
    def is_autofill_done(self):
        return True

60
61

class Bibliography(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
62
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
63
64
    Bibliography references
    (publications or patents)
Hervé  MENAGER's avatar
Hervé MENAGER committed
65
66
    """
    SOURCES = (
67
68
        ('PM', 'PubMed ID'),
        ('PT', 'Patent'),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
69
        ('DO', 'DOI')
Hervé  MENAGER's avatar
Hervé MENAGER committed
70
    )
71
72
73
    id_source_validators = dict(
        PM=re.compile("^[0-9]+$"),
        PT=re.compile("^.*$"),
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
74
        DO=re.compile("^10.\d{4,9}/.+$"),
75
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
76
    source = models.CharField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
77
        'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
Hervé  MENAGER's avatar
Hervé MENAGER committed
78
    id_source = models.CharField('Bibliographic ID', max_length=25)
Hervé  MENAGER's avatar
Hervé MENAGER committed
79
80
81
    title = models.TextField('Title')
    journal_name = models.TextField('Journal name', null=True, blank=True)
    authors_list = models.TextField('Authors list')
Hervé  MENAGER's avatar
Hervé MENAGER committed
82
    biblio_year = models.PositiveSmallIntegerField('Year')
83
    cytotox = models.BooleanField('Cytotoxicity data', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
84
85
86
87
    in_silico = models.BooleanField('in silico study', default=False)
    in_vitro = models.BooleanField('in vitro study', default=False)
    in_vivo = models.BooleanField('in vivo study', default=False)
    in_cellulo = models.BooleanField('in cellulo study', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
88
89
    pharmacokinetic = models.BooleanField(
        'pharmacokinetic study', default=False)
Rachel TORCHET's avatar
Rachel TORCHET committed
90
    xray = models.BooleanField('X-Ray data', default=False)
Hervé  MENAGER's avatar
Hervé MENAGER committed
91

92
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
93
94
95
96
        """
        fetch information from external services
        (Pubmed or Google patents)
        """
97
98
        if self.source == 'PM':
            info = get_pubmed_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
99
        elif self.source == 'PT':
100
            info = get_google_patent_info(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
101
102
103
104
        elif self.source == 'DO':
            info = get_doi_info(self.id_source)
        else:
            raise NotImplementedError()
105
106
107
108
        self.title = info['title']
        self.journal_name = info['journal_name']
        self.authors_list = info['authors_list']
        self.biblio_year = info['biblio_year']
Hervé  MENAGER's avatar
Hervé MENAGER committed
109

110
111
112
    def is_autofill_done(self):
        return len(self.title) > 0

113
114
    def clean(self):
        super().clean()
115
116
117
        Bibliography.validate_source_id(self.id_source, self.source)

    def has_external_url(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
118
        return self.source == 'PM' or self.source == 'DO'
119
120
121
122

    def get_external_url(self):
        if self.source == 'PM':
            return "https://www.ncbi.nlm.nih.gov/pubmed/" + str(self.id_source)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
123
124
        if self.source == 'DO':
            return "https://doi.org/" + str(self.id_source)
125
126
127
128
129

    @staticmethod
    def validate_source_id(id_source, source):
        id_source_validator = Bibliography.id_source_validators[source]
        if not id_source_validator.match(id_source):
130
131
132
133
134
            raise ValidationError(
                dict(
                    id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
                )
            )
135
        return True
136

Hervé  MENAGER's avatar
Hervé MENAGER committed
137
    class Meta:
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
        verbose_name_plural = "Bibliographies"
        verbose_name = "Bibliography"

    def data_and_study(self):
        ret = []
        for f in [
            "cytotox",
            "xray",
            "in_silico",
            "in_vitro",
            "in_cellulo",
            "in_vivo",
            "pharmacokinetic",
        ]:
            if getattr(self, f, False):
                ret.append(self._meta.get_field(f).verbose_name.title())
        return ", ".join(ret)
Hervé  MENAGER's avatar
Hervé MENAGER committed
155

156
157
    def __str__(self):
        return '{}, {}'.format(self.source, self.id_source)
158

159
160
161
    def get_absolute_url(self):
        return reverse('biblio-view', kwargs={'biblio_pk': self.pk})

Hervé  MENAGER's avatar
Hervé MENAGER committed
162

163
class Taxonomy(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
164
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
165
    Taxonomy IDs (from NCBI Taxonomy)
Hervé  MENAGER's avatar
Hervé MENAGER committed
166
167
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
168
169
    taxonomy_id = models.DecimalField(
        'NCBI TaxID', unique=True, max_digits=9, decimal_places=0)
Hervé  MENAGER's avatar
Hervé MENAGER committed
170
    name = models.CharField('Organism name', max_length=200)
171

172
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
173
174
175
176
        """
        fetch information from external services
        (NCBI Entrez)
        """
177
178
179
        info = get_taxonomy_info(self.taxonomy_id)
        self.name = info['scientific_name']

180
181
182
    def __str__(self):
        return self.name

Hervé  MENAGER's avatar
Hervé MENAGER committed
183
184
    class Meta:
        verbose_name_plural = "taxonomies"
Hervé  MENAGER's avatar
Hervé MENAGER committed
185

Hervé  MENAGER's avatar
Hervé MENAGER committed
186

187
class MolecularFunction(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
188
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
189
    Molecular functions (from Gene Ontology)
Hervé  MENAGER's avatar
Hervé MENAGER committed
190
191
    and the corresponding human-readable description
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
192
    go_id = models.CharField('Gene Ontology ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
193
    # GO term id format: 'GO:0000000'
Hervé  MENAGER's avatar
Hervé MENAGER committed
194
195
    description = models.CharField('description', max_length=500)

196
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
197
198
199
200
        """
        fetch information from external services
        (EBI OLS)
        """
201
202
203
        info = get_go_info(self.go_id)
        self.description = info['label']

204
205
206
    def is_autofill_done(self):
        return self.description is not None and len(self.description) > 0

207
208
209
210
    @property
    def name(self):
        return self.go_id + ' ' + self.description

211
212
213
    def __str__(self):
        return self.description

Hervé  MENAGER's avatar
Hervé MENAGER committed
214

215
class Protein(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
216
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
217
    Protein information (from Uniprot)
Hervé  MENAGER's avatar
Hervé MENAGER committed
218
219
    and the corresponding human-readable name
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
220
    uniprot_id = models.CharField('Uniprot ID', unique=True, max_length=10)
Hervé  MENAGER's avatar
Hervé MENAGER committed
221
222
    recommended_name_long = models.CharField(
        'Uniprot Recommended Name (long)', max_length=75)
Hervé  MENAGER's avatar
Hervé MENAGER committed
223
224
225
    short_name = models.CharField('Short name', max_length=50)
    gene_name = models.CharField('Gene name', unique=True, max_length=30)
    entry_name = models.CharField('Entry name', max_length=30)
226
    organism = models.ForeignKey('Taxonomy', models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
227
    molecular_functions = models.ManyToManyField(MolecularFunction)
228
    domains = models.ManyToManyField('Domain')
Hervé  MENAGER's avatar
Hervé MENAGER committed
229

230
    @transaction.atomic
231
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
232
233
234
235
        """
        fetch information from external services
        (Uniprot) and create Taxonomy/Molecular Functions if needed
        """
236
        info = get_uniprot_info(self.uniprot_id)
237
        self.recommended_name_long = info['recommended_name']
238
239
240
241
242
243
244
245
246
247

        gene_names = info['gene_names']
        # put whatever name it find
        self.gene_name = gene_names[0]['name']
        # then try to find the primary, if present
        for gene_name in gene_names:
            if gene_name["type"] == "primary":
                self.gene_name = gene_name["name"]
                break

248
        self.entry_name = info['entry_name']
249
        self.short_name = info['short_name']
250
251
252
253
254
        try:
            taxonomy = Taxonomy.objects.get(taxonomy_id=info['organism'])
        except Taxonomy.DoesNotExist:
            taxonomy = Taxonomy()
            taxonomy.taxonomy_id = info['organism']
255
            taxonomy.save(autofill=True)
256
        self.organism = taxonomy
257
        self.__info = info
258

259
260
    def autofill_post_save(self):
        info = self.__info
261
        for go_id in info['molecular_functions']:
262
            mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id)
263
            self.molecular_functions.add(mol_function)
264

265
        for domain_id in info['domains']:
266
            domain, created = Domain.objects.get_or_create(pfam_acc=domain_id)
267
268
            self.domains.add(domain)

269
270
271
    def is_autofill_done(self):
        return len(self.gene_name) > 0

272
273
274
    def __str__(self):
        return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)

Hervé  MENAGER's avatar
Hervé MENAGER committed
275

276
class Domain(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
277
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
278
    Domain (i.e. Protein domain) information (from PFAM)
Hervé  MENAGER's avatar
Hervé MENAGER committed
279
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
280
281
    pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
    pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
Hervé  MENAGER's avatar
Hervé MENAGER committed
282
    pfam_description = models.CharField('Pfam Description', max_length=100)
283
    domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
284

Hervé  MENAGER's avatar
Hervé MENAGER committed
285
286
    # TODO: what is this field? check database
    # contents
287

288
    def autofill(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
289
290
291
292
        """
        fetch information from external services
        (PFAM)
        """
293
294
295
        info = get_pfam_info(self.pfam_acc)
        self.pfam_id = info['id']
        self.pfam_description = info['description']
Hervé  MENAGER's avatar
Hervé MENAGER committed
296

297
298
299
    def is_autofill_done(self):
        return self.pfam_id is not None and len(self.pfam_id) > 0

300
301
302
303
    @property
    def name(self):
        return self.pfam_id

304
305
306
    def __str__(self):
        return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)

Hervé  MENAGER's avatar
Hervé MENAGER committed
307

308
class ProteinDomainComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
309
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
310
    Protein-Domain association
Hervé  MENAGER's avatar
Hervé MENAGER committed
311
    """
312
    protein = models.ForeignKey('Protein', models.CASCADE)
313
    domain = models.ForeignKey('Domain', models.CASCADE, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
314
315
316
    ppc_copy_nb = models.IntegerField(
        'Number of copies of the protein in the complex')

Hervé  MENAGER's avatar
Hervé MENAGER committed
317
318
    class Meta:
        verbose_name_plural = "complexes"
319

320
321
322
    def __str__(self):
        return '{}-{}'.format(self.protein_id, self.domain_id)

323
324
    def name(self):
        return self.protein.short_name
Hervé  MENAGER's avatar
Hervé MENAGER committed
325

326

327
class ProteinDomainBoundComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
328
329
330
    """
    Protein-Domain association with a "bound complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
331
    ppp_copy_nb_per_p = models.IntegerField(
332
333
        _('ppp_copy_nb_per_p')
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
334

335
336
    class Meta:
        verbose_name_plural = "bound complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
337
338


339
class ProteinDomainPartnerComplex(ProteinDomainComplex):
Hervé  MENAGER's avatar
Hervé MENAGER committed
340
341
342
    """
    Protein-Domain association with a "partner complex" role
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
343

344
345
    class Meta:
        verbose_name_plural = "partner complexes"
Hervé  MENAGER's avatar
Hervé MENAGER committed
346

Hervé  MENAGER's avatar
Hervé MENAGER committed
347

348
class Symmetry(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
349
350
351
    """
    Symmetry of a PPI
    """
352
353
    code = models.CharField('Symmetry code', max_length=2)
    description = models.CharField('Description', max_length=300)
Hervé  MENAGER's avatar
Hervé MENAGER committed
354

355
356
357
    class Meta:
        verbose_name_plural = "symmetries"

358
359
360
    def __str__(self):
        return '{} ({})'.format(self.code, self.description)

361
362

class Disease(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
363
    name = models.CharField('Disease', max_length=30, unique=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
364

Hervé  MENAGER's avatar
Hervé MENAGER committed
365
    # is there any database/nomenclature for diseases?
366
367
368
369

    def __str__(self):
        return self.name

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
370

Hervé  MENAGER's avatar
Hervé MENAGER committed
371
class PpiFamily(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
372
373
374
    """
    PPI Family
    """
Hervé  MENAGER's avatar
Hervé MENAGER committed
375
376
    name = models.CharField('Name', max_length=30, unique=True)

377
378
379
    class Meta:
        verbose_name_plural = "PPI Families"

Hervé  MENAGER's avatar
Hervé MENAGER committed
380
381
    def __str__(self):
        return self.name
Hervé  MENAGER's avatar
Hervé MENAGER committed
382

Hervé  MENAGER's avatar
Hervé MENAGER committed
383

384
class Ppi(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
385
386
387
    """
    PPI
    """
388
    pdb_id = models.CharField('PDB ID', max_length=4, null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
389
390
    pockets_nb = models.IntegerField(
        'Total number of pockets in the complex', default=1)
391
    symmetry = models.ForeignKey(Symmetry, models.CASCADE)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
392
    diseases = models.ManyToManyField(Disease, blank=True)
393
    family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
394
    name = models.TextField('PPI name', null=True, blank=True)
Hervé  MENAGER's avatar
Hervé MENAGER committed
395

396
    def __str__(self):
397
        return 'PPI #{} on {}'.format(self.id, self.name)
398

399
400
401
    def get_absolute_url(self):
        return reverse('ppi-view', kwargs={'ppi_pk': self.pk})

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
402
    def is_autofill_done(self):
403
        return self.name != ""
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
404

405
406
407
    def autofill(self):
        # name is denormalized and stored in the database to reduce SQL queries in query mode
        self.name = self.compute_name_from_protein_names()
408
409
410
411
412
413
414

    def get_ppi_bound_complexes(self):
        """
        return bound ppi complexes belonging to this ppi
        """
        return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())

415
    def compute_name_from_protein_names(self):
Hervé  MENAGER's avatar
Hervé MENAGER committed
416
        all_protein_names = set(
417
            [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
Hervé  MENAGER's avatar
Hervé MENAGER committed
418
419
        bound_protein_names = set(
            [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
420
421
422
423
        partner_protein_names = all_protein_names - bound_protein_names
        bound_str = ','.join(bound_protein_names)
        partner_str = ','.join(partner_protein_names)
        name = bound_str
Hervé  MENAGER's avatar
Hervé MENAGER committed
424
        if partner_str != '':
425
426
            name += ' / ' + partner_str
        return name
427

Hervé  MENAGER's avatar
Hervé MENAGER committed
428

Hervé  MENAGER's avatar
Hervé MENAGER committed
429
class PpiComplex(models.Model):
Hervé  MENAGER's avatar
Hervé MENAGER committed
430
431
432
    """
    PPI Complex
    """
433
434
    ppi = models.ForeignKey(Ppi, models.CASCADE)
    complex = models.ForeignKey(ProteinDomainComplex, models.CASCADE)
Hervé  MENAGER's avatar
Hervé MENAGER committed
435
    cc_nb = models.IntegerField(
436
437
438
        verbose_name=_('cc_nb_verbose_name'),
        default=1,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
439
440
441
442

    class Meta:
        verbose_name_plural = "Ppi complexes"

443
444
445
    def __str__(self):
        return 'PPI {}, Complex {} ({})'.format(self.ppi, self.complex, self.cc_nb)

Hervé  MENAGER's avatar
Hervé MENAGER committed
446

447
class CompoundManager(models.Manager):
Hervé  MENAGER's avatar
Hervé MENAGER committed
448
449
450
451
    """
    CompoundManager adds automatically a number of annotations to the results
    of the database query, used for filters and compound card
    """
452
453

    def get_queryset(self):
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
454
        # @formatter:off
455
        qs = super().get_queryset()
456
        # with number of publications
457
        qs = qs.annotate(pubs=Count('refcompoundbiblio', distinct=True))
458
        # with best activity
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
459
        qs = qs.annotate(best_activity=Max('compoundactivityresult__activity'))
460
461
462
463
464
465
466
467
468
469
470
471
472
        # with LE
        qs = qs.annotate(le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField()))
        # with LLE
        qs = qs.annotate(lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField()))
        # Lipinsky MW (<=500)
        qs = qs.annotate(lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hba (<=10)
        qs = qs.annotate(lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField()))
        # Lipinsky hbd (<5)
        qs = qs.annotate(lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky a_log_p (<5)
        qs = qs.annotate(lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField()))
        # Lipinsky global
Hervé  MENAGER's avatar
Hervé MENAGER committed
473
474
        qs = qs.annotate(lipinsky_score=Cast(F('lipinsky_mw'), IntegerField()) + Cast(F('lipinsky_hba'), IntegerField()) +
                         Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField()))
475
476
477
478
479
480
481
482
483
        qs = qs.annotate(lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField()))
        # Veber hba_hbd (<=12)
        qs = qs.annotate(hba_hbd=F('nb_acceptor_h')+F('nb_donor_h'))
        qs = qs.annotate(veber_hba_hbd=Case(When(hba_hbd__lte=12, then=True), default=False, output_field=BooleanField()))
        # Veber TPSA (<=140)
        qs = qs.annotate(veber_tpsa=Case(When(tpsa__lte=140, then=True), default=False, output_field=BooleanField()))
        # Veber Rotatable Bonds (<=10)
        qs = qs.annotate(veber_rb=Case(When(nb_rotatable_bonds__lte=10, then=True), default=False, output_field=BooleanField()))
        # Veber global (Rotatable bonds and (hba_hbd or tpsa))
484
        qs = qs.annotate(veber=Case(When(Q(Q(nb_rotatable_bonds__lte=10) & (Q(hba_hbd__lte=12) | Q(tpsa__lte=140))), then=True), default=False, output_field=BooleanField()))
485
486
487
488
489
        # Pfizer AlogP (<=3)
        qs = qs.annotate(pfizer_a_log_p=Case(When(a_log_p__lte=3, then=True), default=False, output_field=BooleanField()))
        # Pfizer TPSA (>=75)
        qs = qs.annotate(pfizer_tpsa=Case(When(tpsa__gte=75, then=True), default=False, output_field=BooleanField()))
        # Pfizer global (AlogP and TPSA)
490
        qs = qs.annotate(pfizer=Case(When(Q(Q(a_log_p__lte=3) & Q(tpsa__gte=75)), then=True), default=False, output_field=BooleanField()))
491
        # PDB ligand available
492
        qs = qs.annotate(pdb_ligand_av=Cast(Max(Case(When(compoundaction__ligand_id__isnull=False, then=1), default=0, output_field=IntegerField())), BooleanField()))
493
494
495
496
497
498
        # inhibition role
        qs = qs.annotate(inhibition_role=Case(When(compoundactivityresult__modulation_type='I', then=True), default=False, output_field=BooleanField()))
        # binding role
        qs = qs.annotate(binding_role=Case(When(compoundactivityresult__modulation_type='B', then=True), default=False, output_field=BooleanField()))
        # stabilisation role
        qs = qs.annotate(stabilisation_role=Case(When(compoundactivityresult__modulation_type='S', then=True), default=False, output_field=BooleanField()))
499
        # cellular tests performed
500
        qs = qs.annotate(celltest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_type='CELL', then=1), default=0, output_field=IntegerField())), BooleanField()))
501
        # inhibition tests performed
502
        qs = qs.annotate(inhitest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='I', then=1), default=0, output_field=IntegerField())), BooleanField()))
503
        # stabilisation tests performed
504
        qs = qs.annotate(stabtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='S', then=1), default=0, output_field=IntegerField())), BooleanField()))
505
        # binding tests performed
506
        qs = qs.annotate(bindtest_av=Cast(Max(Case(When(compoundactivityresult__test_activity_description__test_modulation_type='B', then=1), default=0, output_field=IntegerField())), BooleanField()))
507
        # pharmacokinetic tests performed
508
        qs = qs.annotate(pktest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__pharmacokinetic=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
509
        # cytotoxicity tests performedudy
510
        qs = qs.annotate(cytoxtest_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__cytotox=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
511
        # in silico st performed
512
        qs = qs.annotate(insilico_av=Cast(Max(Case(When(refcompoundbiblio__bibliography__in_silico=True, then=1), default=0, output_field=IntegerField())), BooleanField()))
513
514
        # number of tests available
        qs = qs.annotate(tests_av=Count('compoundactivityresult', distinct=True))
Hervé  MENAGER's avatar
Hervé MENAGER committed
515
        # @formatter:on
516
517
        return qs

Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
518

519
class Compound(AutoFillableModel):
Hervé  MENAGER's avatar
Hervé MENAGER committed
520
521
522
    """
    Chemical compound
    """
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
523
    objects = CompoundManager()
524
    canonical_smile = models.TextField(
Bryan  BRANCOTTE's avatar
typo    
Bryan BRANCOTTE committed
525
        verbose_name='Canonical Smiles',
526
527
        unique=True,
    )
528
    is_macrocycle = models.BooleanField(
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
529
530
        verbose_name=_('is_macrocycle_verbose_name'),
        help_text=_('is_macrocycle_help_text'),
531
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
532
    aromatic_ratio = models.DecimalField(
533
534
535
        verbose_name='Aromatic ratio',
        max_digits=3,
        decimal_places=2,
536
537
        blank=True,
        null=True,
538
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
539
    balaban_index = models.DecimalField(
540
541
542
        verbose_name='Balaban index',
        max_digits=3,
        decimal_places=2,
543
544
        blank=True,
        null=True,
545
546
547
548
549
    )
    fsp3 = models.DecimalField(
        verbose_name='Fsp3',
        max_digits=3,
        decimal_places=2,
550
551
        blank=True,
        null=True,
552
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
553
    gc_molar_refractivity = models.DecimalField(
554
555
556
        verbose_name='GC Molar Refractivity',
        max_digits=5,
        decimal_places=2,
557
558
        blank=True,
        null=True,
559
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
560
    log_d = models.DecimalField(
561
562
563
        verbose_name='LogD (Partition coefficient octanol-1/water, with pKa information)',
        max_digits=4,
        decimal_places=2,
564
565
        blank=True,
        null=True,
566
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
567
    a_log_p = models.DecimalField(
568
569
570
        verbose_name='ALogP (Partition coefficient octanol-1/water)',
        max_digits=4,
        decimal_places=2,
571
572
        blank=True,
        null=True,
573
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
574
    mean_atom_vol_vdw = models.DecimalField(
575
576
577
        verbose_name='Mean atom volume computed with VdW radii',
        max_digits=4,
        decimal_places=2,
578
579
        blank=True,
        null=True,
580
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
581
    molecular_weight = models.DecimalField(
582
583
584
        verbose_name='Molecular weight',
        max_digits=6,
        decimal_places=2,
585
586
        blank=True,
        null=True,
587
588
589
    )
    nb_acceptor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond acceptors',
590
591
        blank=True,
        null=True,
592
593
594
    )
    nb_aliphatic_amines = models.IntegerField(
        verbose_name='Number of aliphatics amines',
595
596
        blank=True,
        null=True,
597
598
599
    )
    nb_aromatic_bonds = models.IntegerField(
        verbose_name='Number of aromatic bonds',
600
601
        blank=True,
        null=True,
602
603
604
    )
    nb_aromatic_ether = models.IntegerField(
        verbose_name='Number of aromatic ethers',
605
606
        blank=True,
        null=True,
607
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
608
    nb_aromatic_sssr = models.IntegerField(
609
        verbose_name='Number of aromatic Smallest Set of System Rings (SSSR)',
610
611
        blank=True,
        null=True,
612
613
614
    )
    nb_atom = models.IntegerField(
        verbose_name='Number of atoms',
615
616
        blank=True,
        null=True,
617
618
619
    )
    nb_atom_non_h = models.IntegerField(
        verbose_name='Number of non hydrogen atoms',
620
621
        blank=True,
        null=True,
622
623
624
    )
    nb_benzene_like_rings = models.IntegerField(
        verbose_name='Number of benzene-like rings',
625
626
        blank=True,
        null=True,
627
628
629
    )
    nb_bonds = models.IntegerField(
        verbose_name='Number of bonds',
630
631
        blank=True,
        null=True,
632
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
633
    nb_bonds_non_h = models.IntegerField(
634
        verbose_name='Number of bonds not involving a hydrogen',
635
636
        blank=True,
        null=True,
637
638
639
    )
    nb_br = models.IntegerField(
        verbose_name='Number of Bromine atoms',
640
641
        blank=True,
        null=True,
642
643
644
    )
    nb_c = models.IntegerField(
        verbose_name='Number of Carbon atoms',
645
646
        blank=True,
        null=True,
647
648
649
    )
    nb_chiral_centers = models.IntegerField(
        verbose_name='Number of chiral centers',
650
651
        blank=True,
        null=True,
652
653
654
    )
    nb_circuits = models.IntegerField(
        verbose_name='Number of circuits',
655
656
        blank=True,
        null=True,
657
658
659
    )
    nb_cl = models.IntegerField(
        verbose_name='Number of Chlorine atoms',
660
661
        blank=True,
        null=True,
662
663
664
    )
    nb_csp2 = models.IntegerField(
        verbose_name='Number of sp2-hybridized carbon atoms',
665
666
        blank=True,
        null=True,
667
668
669
    )
    nb_csp3 = models.IntegerField(
        verbose_name='Number of sp3-hybridized carbon atoms',
670
671
        blank=True,
        null=True,
672
673
674
    )
    nb_donor_h = models.IntegerField(
        verbose_name='Number of hydrogen bond donors',
675
676
        blank=True,
        null=True,
677
678
679
    )
    nb_double_bonds = models.IntegerField(
        verbose_name='Number of double bonds',
680
681
        blank=True,
        null=True,
682
683
684
    )
    nb_f = models.IntegerField(
        verbose_name='Number of fluorine atoms',
685
686
        blank=True,
        null=True,
687
688
689
    )
    nb_i = models.IntegerField(
        verbose_name='Number of iodine atoms',
690
691
        blank=True,
        null=True,
692
693
694
    )
    nb_multiple_bonds = models.IntegerField(
        verbose_name='Number of multiple bonds',
695
696
        blank=True,
        null=True,
697
698
699
    )
    nb_n = models.IntegerField(
        verbose_name='Number of nitrogen atoms',
700
701
        blank=True,
        null=True,
702
703
704
    )
    nb_o = models.IntegerField(
        verbose_name='Number of oxygen atoms',
705
706
        blank=True,
        null=True,
707
708
709
    )
    nb_rings = models.IntegerField(
        verbose_name='Number of rings',
710
711
        blank=True,
        null=True,
712
713
714
    )
    nb_rotatable_bonds = models.IntegerField(
        verbose_name='Number of rotatable bonds',
715
716
        blank=True,
        null=True,
717
718
719
    )
    inchi = models.TextField(
        verbose_name='InChi',
720
721
        blank=True,
        null=True,
722
723
724
    )
    inchikey = models.TextField(
        verbose_name='InChiKey',
725
726
        blank=True,
        null=True,
727
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
728
    randic_index = models.DecimalField(
729
730
731
        verbose_name='Randic index',
        max_digits=4,
        decimal_places=2,
732
733
        blank=True,
        null=True,
734
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
735
    rdf070m = models.DecimalField(
736
737
738
        verbose_name='RDF070m, radial distribution function weighted by the atomic masses at 7Å',
        max_digits=5,
        decimal_places=2,
739
740
        blank=True,
        null=True,
741
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
742
    rotatable_bond_fraction = models.DecimalField(
743
744
745
        verbose_name='Fraction of rotatable bonds',
        max_digits=3,
        decimal_places=2,
746
747
        blank=True,
        null=True,
748
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
749
    sum_atom_polar = models.DecimalField(
750
751
752
        verbose_name='Sum of atomic polarizabilities',
        max_digits=5,
        decimal_places=2,
753
754
        blank=True,
        null=True,
755
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
756
    sum_atom_vol_vdw = models.DecimalField(
757
758
759
        verbose_name='Sum of atom volumes computed with VdW radii',
        max_digits=6,
        decimal_places=2,
760
761
        blank=True,
        null=True,
762
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
763
    tpsa = models.DecimalField(
764
765
766
        verbose_name='Topological Polar Surface Area (TPSA)',
        max_digits=5,
        decimal_places=2,
767
768
        blank=True,
        null=True,
769
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
770
    ui = models.DecimalField(
771
772
773
        verbose_name='Unsaturation index',
        max_digits=4,
        decimal_places=2,
774
775
        blank=True,
        null=True,
776
777
778
    )
    wiener_index = models.IntegerField(
        verbose_name='Wiener index',
779
780
        blank=True,
        null=True,
781
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
782
    common_name = models.CharField(
783
784
785
786
787
        verbose_name='Common name',
        max_length=20,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
788
    pubchem_id = models.CharField(
789
790
791
792
793
        verbose_name='Pubchem ID',
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
794
    chemspider_id = models.CharField(
795
796
797
798
799
800
        verbose_name='Chemspider ID',
        unique=True,
        max_length=10,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
801
    chembl_id = models.CharField(
802
803
804
805
806
        verbose_name='Chembl ID',
        max_length=30,
        blank=True,
        null=True,
    )
Hervé  MENAGER's avatar
Hervé MENAGER committed
807
    iupac_name = models.CharField(
808
809
810
811
812
        verbose_name='IUPAC name',
        max_length=255,
        blank=True,
        null=True,
    )
813

814
    class Meta:
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
815
        ordering = ['id']
816

817
818
819
    def compute_drugbank_compound_similarity(self):
        """ compute Tanimoto similarity to existing DrugBank compounds """
        self.save()
820
        # fingerprints to compute drugbank similarities are in settings module, default FP2
821
        fingerprinter = FingerPrinter(getattr(settings, "DRUGBANK_FINGERPRINTS", "FP2"))
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
822
823
        # 1. compute tanimoto for SMILES query vs all compounds
        smiles_dict = {c.id: c.canonical_smiles for c in DrugBankCompound.objects.all()}
824
825
826
827
        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
        dbcts = []
        for id_, tanimoto in tanimoto_dict.items():
Bryan  BRANCOTTE's avatar
Bryan BRANCOTTE committed
828
829
830
831
832
            dbcts.append(DrugbankCompoundTanimoto(
                compound=self,
                drugbank_compound=DrugBankCompound.objects.get(id=id_),
                tanimoto=tanimoto,
            ))
833
834
        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)

Hervé  MENAGER's avatar
Hervé MENAGER committed
835
836
837
    @property
    def biblio_refs(self):
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
838
        return all RefCompoundBiblio related to this compound
Hervé  MENAGER's avatar
Hervé MENAGER committed
839
840
        """
        return RefCompoundBiblio.objects.filter(compound=self)
841

842
843
844
845
846
847
848
849
850
851
852
853
854
    @property
    def pfam_ids(self):
        """
        return all PFAM ids for the domain of the proteins of the bound
        complexes in the PPIs this compound has an action on
        """
        pfam_ids = set()
        for ca in self.compoundaction_set.all():
            ca.get_complexes()
            for bound_complex in ca.ppi.get_ppi_bound_complexes():
                pfam_ids.add(bound_complex.complex.domain.pfam_id)
        return pfam_ids

855
    @property
Hervé  MENAGER's avatar
Hervé MENAGER committed
856
    def compound_action_ligand_ids(self):
857
858
859
        """
        return all PDB codes of the corresponding compound actions
        """
Hervé  MENAGER's avatar
Hervé MENAGER committed
860
        ligand_ids = set()
861
        for ca in self.compoundaction_set.all():
Hervé  MENAGER's avatar
Hervé MENAGER committed
862
863
            ligand_ids.add(ca.ligand_id)
        return ligand_ids
864

865
866
    @property
    def best_pXC50_activity(self):
867
        return self.compoundactivityresult_set.aggregate(Max('activity'))['activity__max']
868
869
870
871
872
873

    @property
    def best_pXC50_compound_activity_result(self):
        best_pXC50_activity = self.best_pXC50_activity
        if best_pXC50_activity is None:
            return None
874
        return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
875

876
    @property
877
    def best_pXC50_activity_ppi_name(self):
878
        """
879
        Name of the PPI corresponding to the best PXC50 activity
880
        """
881
882
883
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
884
885
886
        ppi_name = best_activity_car.test_activity_description.ppi.name
        return ppi_name

887
888
889
890
891
892
893
894
895
896
897
    @property
    def best_pXC50_activity_ppi_family(self):
        """
        Family of the PPI corresponding to the best PXC50 activity
        """
        best_activity_car = self.best_pXC50_compound_activity_result
        if best_activity_car is None:
            return None
        ppi_family = best_activity_car.test_activity_description.ppi.family.name
        return ppi_family

898
899
900
901
902
903
904
905
906
907
908
909
910
911
    @property
    def bioch_tests_count(self):
        """
        return the number of associated biochemical tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='BIOCH').count()

    @property
    def cell_tests_count(self):
        """
        return the number of associated cell tests
        """
        return self.compoundactivityresult_set.all().filter(test_activity_description__test_type='CELL').count()

912