Commit 24d27dfa authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

cache many compound properties in the Compound model table

warning: now these properties need to be updated with the new
admin command `cache_compound_properties`
parent 43e7bfdf
Pipeline #12166 failed with stages
in 16 minutes and 34 seconds
from django.core.management import BaseCommand
from ippidb.models import update_compound_cached_properties
class Command(BaseCommand):
help = "Cache compound properties in the main 'Compound' table"
def handle(self, *args, **options):
self.stdout.write(self.style.SUCCESS('Generating the compound properties cache...'))
n = update_compound_cached_properties()
self.stdout.write(
self.style.SUCCESS('Successfully generated compound properties cache for %s compounds' % n))
\ No newline at end of file
......@@ -20,10 +20,10 @@ class Command(BaseCommand):
if comp.le is not None:
le = round(comp.le, 7)
lle = round(comp.lle, 7)
le_lle_data.append({'x': le, 'y': lle, 'id': comp.id, 'family_name': comp.best_pXC50_activity_ppi_family, 'smiles': comp.canonical_smile})
le_lle_data.append({'x': le, 'y': lle, 'id': comp.id, 'family_name': comp.best_activity_ppi_family_name, 'smiles': comp.canonical_smile})
else:
self.stdout.write(
self.style.WARNING('compound %s has no LE (probably because no pXC50 activity results have been registered)' % comp.id))
self.style.WARNING('compound %s has no LE' % comp.id))
le_lle_json = json.dumps(le_lle_data, separators=(',',':'))
new = LeLleBiplotData()
new.le_lle_biplot_data = le_lle_json
......
......@@ -45,7 +45,7 @@ class Command(BaseCommand):
l = []
for comp in Compound.objects.all():
values = model_to_dict(comp, fields=features + ['id','family'])
values['family'] = comp.best_pXC50_activity_ppi_family
values['family'] = comp.best_activity_ppi_family_name
l.append(values)
df = pd.DataFrame(l)
x = df.loc[:, features].values
......
# Generated by Django 2.0.13 on 2019-05-24 14:56
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ippidb', '0022_auto_20190319_1537'),
]
operations = [
migrations.AlterField(
model_name='bibliography',
name='authors_list',
field=models.TextField(verbose_name='Authors list'),
),
migrations.AlterField(
model_name='bibliography',
name='journal_name',
field=models.TextField(blank=True, null=True, verbose_name='Journal name'),
),
migrations.AlterField(
model_name='bibliography',
name='title',
field=models.TextField(verbose_name='Title'),
),
]
# Generated by Django 2.2.1 on 2019-06-02 15:46
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ippidb', '0023_auto_20190524_1456'),
]
operations = [
migrations.AddField(
model_name='compound',
name='best_activity',
field=models.DecimalField(blank=True, decimal_places=10, max_digits=12, null=True, verbose_name='Best activity'),
),
migrations.AddField(
model_name='compound',
name='binding_role',
field=models.BooleanField(blank=True, null=True, verbose_name='Binding role'),
),
migrations.AddField(
model_name='compound',
name='bindtest_av',
field=models.BooleanField(blank=True, null=True, verbose_name='Binding tests performed'),
),
migrations.AddField(
model_name='compound',
name='celltest_av',
field=models.BooleanField(blank=True, null=True, verbose_name='Cellular tests performed'),
),
migrations.AddField(
model_name='compound',
name='cytoxtest_av',
field=models.BooleanField(blank=True, null=True, verbose_name='Cytotoxicity tests performed'),
),
migrations.AddField(
model_name='compound',
name='hba_hbd',
field=models.IntegerField(blank=True, null=True, verbose_name='Sum of Hydrogen bond acceptors and donors'),
),
migrations.AddField(
model_name='compound',
name='inhibition_role',
field=models.BooleanField(blank=True, null=True, verbose_name='Inhibition role'),
),
migrations.AddField(
model_name='compound',
name='inhitest_av',
field=models.BooleanField(blank=True, null=True, verbose_name='Inhibition tests performed'),
),
migrations.AddField(
model_name='compound',
name='insilico_av',
field=models.BooleanField(blank=True, null=True, verbose_name='In silico tests performed'),
),
migrations.AddField(
model_name='compound',
name='le',
field=models.FloatField(blank=True, null=True, verbose_name='Ligand efficiency'),
),
migrations.AddField(
model_name='compound',
name='lipinsky',
field=models.BooleanField(blank=True, null=True, verbose_name='Lipinsky ok'),
),
migrations.AddField(
model_name='compound',
name='lipinsky_a_log_p',
field=models.BooleanField(blank=True, null=True, verbose_name='A log P ok for Lipinsky'),
),
migrations.AddField(
model_name='compound',
name='lipinsky_hba',
field=models.BooleanField(blank=True, null=True, verbose_name='Hydrogen bond acceptors ok for Lipinsky'),
),
migrations.AddField(
model_name='compound',
name='lipinsky_hbd',
field=models.BooleanField(blank=True, null=True, verbose_name='Hydrogen bond donors ok for Lipinsky'),
),
migrations.AddField(
model_name='compound',
name='lipinsky_mw',
field=models.BooleanField(blank=True, null=True, verbose_name='MW ok for Lipinsky'),
),
migrations.AddField(
model_name='compound',
name='lipinsky_score',
field=models.IntegerField(blank=True, null=True, verbose_name='Lipinsky score'),
),
migrations.AddField(
model_name='compound',
name='lle',
field=models.FloatField(blank=True, null=True, verbose_name='Lipophilic efficiency'),
),
migrations.AddField(
model_name='compound',
name='pdb_ligand_av',
field=models.BooleanField(blank=True, null=True, verbose_name='PDB ligand available'),
),
migrations.AddField(
model_name='compound',
name='pfizer',
field=models.BooleanField(blank=True, null=True, verbose_name='Pfizer ok'),
),
migrations.AddField(
model_name='compound',
name='pfizer_a_log_p',
field=models.BooleanField(blank=True, null=True, verbose_name='A log P ok for Pfizer'),
),
migrations.AddField(
model_name='compound',
name='pfizer_tpsa',
field=models.BooleanField(blank=True, null=True, verbose_name='TPSA ok for Pfizer'),
),
migrations.AddField(
model_name='compound',
name='pktest_av',
field=models.BooleanField(blank=True, null=True, verbose_name='Pharmacokinetic tests performed'),
),
migrations.AddField(
model_name='compound',
name='pubs',
field=models.IntegerField(blank=True, null=True, verbose_name='Number of publications'),
),
migrations.AddField(
model_name='compound',
name='stabilisation_role',
field=models.BooleanField(blank=True, null=True, verbose_name='Stabilisation role'),
),
migrations.AddField(
model_name='compound',
name='tests_av',
field=models.IntegerField(blank=True, null=True, verbose_name='Number of tests available'),
),
migrations.AddField(
model_name='compound',
name='veber',
field=models.BooleanField(blank=True, null=True, verbose_name='Veber ok'),
),
migrations.AddField(
model_name='compound',
name='veber_hba_hbd',
field=models.BooleanField(blank=True, null=True, verbose_name='HBA+HBD ok for Veber'),
),
migrations.AddField(
model_name='compound',
name='veber_rb',
field=models.BooleanField(blank=True, null=True, verbose_name='Rotatable bonds ok for Veber'),
),
migrations.AddField(
model_name='compound',
name='veber_tpsa',
field=models.BooleanField(blank=True, null=True, verbose_name='TPSA ok for Veber'),
),
]
# Generated by Django 2.2.1 on 2019-06-03 14:42
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ippidb', '0024_auto_20190602_1546'),
]
operations = [
migrations.AddField(
model_name='compound',
name='stabtest_av',
field=models.BooleanField(blank=True, null=True, verbose_name='Stabilisation tests performed'),
),
]
# Generated by Django 2.2.1 on 2019-06-04 19:37
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ippidb', '0025_compound_stabtest_av'),
]
operations = [
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['molecular_weight'], name='ippidb_comp_molecul_6c82bb_idx'),
),
]
# Generated by Django 2.2.1 on 2019-06-04 19:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ippidb', '0026_auto_20190604_1937'),
]
operations = [
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['a_log_p'], name='ippidb_comp_a_log_p_d6ed66_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['nb_donor_h'], name='ippidb_comp_nb_dono_0dff51_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['nb_acceptor_h'], name='ippidb_comp_nb_acce_2d9049_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['tpsa'], name='ippidb_comp_tpsa_4a45ab_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['nb_rotatable_bonds'], name='ippidb_comp_nb_rota_c38661_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['nb_aromatic_sssr'], name='ippidb_comp_nb_arom_de89e2_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['nb_chiral_centers'], name='ippidb_comp_nb_chir_d779eb_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['fsp3'], name='ippidb_comp_fsp3_bfbbb7_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['pubs'], name='ippidb_comp_pubs_168d42_idx'),
),
]
# Generated by Django 2.2.1 on 2019-06-04 20:58
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ippidb', '0027_auto_20190604_1947'),
]
operations = [
migrations.AddField(
model_name='compound',
name='best_activity_ppi_family_name',
field=models.CharField(blank=True, max_length=30, null=True, unique=True, verbose_name='Best activity PPI family name'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['best_activity'], name='ippidb_comp_best_ac_98454a_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['le'], name='ippidb_comp_le_e4dac5_idx'),
),
migrations.AddIndex(
model_name='compound',
index=models.Index(fields=['lle'], name='ippidb_comp_lle_1f7980_idx'),
),
]
# Generated by Django 2.2.1 on 2019-06-05 06:06
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ippidb', '0028_auto_20190604_2058'),
]
operations = [
migrations.AlterField(
model_name='compound',
name='best_activity_ppi_family_name',
field=models.CharField(blank=True, max_length=30, null=True, verbose_name='Best activity PPI family name'),
),
]
......@@ -12,7 +12,7 @@ from django.contrib.auth import get_user_model
from django.core.exceptions import ValidationError
from django.db import models, transaction
from django.db.models import FloatField, IntegerField, BooleanField
from django.db.models import Max, Count, F, Q, Case, When
from django.db.models import Max, Count, F, Q, Case, When, Subquery, OuterRef
from django.db.models.functions import Cast
from django.urls import reverse
from django.utils.translation import ugettext_lazy as _
......@@ -450,7 +450,7 @@ class CompoundManager(models.Manager):
of the database query, used for filters and compound card
"""
def get_queryset(self):
def get_queryset_old(self):
# @formatter:off
qs = super().get_queryset()
# with number of publications
......@@ -811,8 +811,77 @@ class Compound(AutoFillableModel):
null=True,
)
pubs = models.IntegerField(
verbose_name='Number of publications', null=True, blank=True
)
best_activity = models.DecimalField(
'Best activity', max_digits=12, decimal_places=10, null=True, blank=True)
best_activity_ppi_family_name = models.CharField('Best activity PPI family name', max_length=30, null=True, blank=True)
le = models.FloatField(
verbose_name='Ligand efficiency', null=True, blank=True
)
lle = models.FloatField(
verbose_name='Lipophilic efficiency', null=True, blank=True
)
lipinsky_mw = models.BooleanField('MW ok for Lipinsky', null=True, blank=True)
lipinsky_hba = models.BooleanField('Hydrogen bond acceptors ok for Lipinsky', null=True, blank=True)
lipinsky_hbd = models.BooleanField('Hydrogen bond donors ok for Lipinsky', null=True, blank=True)
lipinsky_a_log_p = models.BooleanField('A log P ok for Lipinsky', null=True, blank=True)
lipinsky_score = models.IntegerField(
verbose_name='Lipinsky score', null=True, blank=True
)
lipinsky = models.BooleanField('Lipinsky ok', null=True, blank=True)
hba_hbd = models.IntegerField(
verbose_name='Sum of Hydrogen bond acceptors and donors', null=True, blank=True
)
veber_hba_hbd = models.BooleanField('HBA+HBD ok for Veber', null=True, blank=True)
veber_tpsa = models.BooleanField('TPSA ok for Veber', null=True, blank=True)
veber_rb = models.BooleanField('Rotatable bonds ok for Veber', null=True, blank=True)
veber = models.BooleanField('Veber ok', null=True, blank=True)
pfizer_a_log_p = models.BooleanField('A log P ok for Pfizer', null=True, blank=True)
pfizer_tpsa = models.BooleanField('TPSA ok for Pfizer', null=True, blank=True)
pfizer = models.BooleanField('Pfizer ok', null=True, blank=True)
pdb_ligand_av = models.BooleanField('PDB ligand available', null=True, blank=True)
inhibition_role = models.BooleanField('Inhibition role', null=True, blank=True)
binding_role = models.BooleanField('Binding role', null=True, blank=True)
stabilisation_role = models.BooleanField('Stabilisation role', null=True, blank=True)
celltest_av = models.BooleanField('Cellular tests performed', null=True, blank=True)
inhitest_av = models.BooleanField('Inhibition tests performed', null=True, blank=True)
stabtest_av = models.BooleanField('Stabilisation tests performed', null=True, blank=True)
bindtest_av = models.BooleanField('Binding tests performed', null=True, blank=True)
pktest_av = models.BooleanField('Pharmacokinetic tests performed', null=True, blank=True)
cytoxtest_av = models.BooleanField('Cytotoxicity tests performed', null=True, blank=True)
insilico_av = models.BooleanField('In silico tests performed', null=True, blank=True)
tests_av = models.IntegerField(verbose_name='Number of tests available', null=True, blank=True)
class Meta:
ordering = ['id']
indexes = [
models.Index(fields=['molecular_weight']),
models.Index(fields=['a_log_p']),
models.Index(fields=['nb_donor_h']),
models.Index(fields=['nb_acceptor_h']),
models.Index(fields=['tpsa']),
models.Index(fields=['nb_rotatable_bonds']),
models.Index(fields=['nb_aromatic_sssr']),
models.Index(fields=['nb_chiral_centers']),
models.Index(fields=['fsp3']),
models.Index(fields=['pubs']),
models.Index(fields=['best_activity']),
models.Index(fields=['le']),
models.Index(fields=['lle']),
]
# indexes = [
# models.Index(fields=['lipinsky']),
# models.Index(fields=['veber']),
# models.Index(fields=['pfizer']),
# models.Index(fields=['pdb_ligand_av']),
# models.Index(fields=['inhibition_role']),
# models.Index(fields=['binding_role']),
# models.Index(fields=['stabilisation_role']),
# models.Index(fields=['binding_role']),
# ]
def compute_drugbank_compound_similarity(self):
""" compute Tanimoto similarity to existing DrugBank compounds """
......@@ -868,33 +937,11 @@ class Compound(AutoFillableModel):
@property
def best_pXC50_compound_activity_result(self):
best_pXC50_activity = self.best_pXC50_activity
best_pXC50_activity = self.best_activity
if best_pXC50_activity is None:
return None
return self.compoundactivityresult_set.filter(activity=best_pXC50_activity)[0]
@property
def best_pXC50_activity_ppi_name(self):
"""
Name of the PPI corresponding to the best PXC50 activity
"""
best_activity_car = self.best_pXC50_compound_activity_result
if best_activity_car is None:
return None
ppi_name = best_activity_car.test_activity_description.ppi.name
return ppi_name
@property
def best_pXC50_activity_ppi_family(self):
"""
Family of the PPI corresponding to the best PXC50 activity
"""
best_activity_car = self.best_pXC50_compound_activity_result
if best_activity_car is None:
return None
ppi_family = best_activity_car.test_activity_description.ppi.family.name
return ppi_family
@property
def bioch_tests_count(self):
"""
......@@ -1296,3 +1343,225 @@ class Contribution(models.Model):
def get_absolute_url(self):
return reverse('contribution-detail', kwargs={'contribution_pk': self.pk})
def update_compound_cached_properties():
return Compound.objects.update(
pubs=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_pubs=Count('refcompoundbiblio', distinct=True)
).values('_pubs')[:1]
),
best_activity=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_best_activity=Max('compoundactivityresult__activity')
).values('_best_activity')[:1]
),
best_activity_ppi_family_name = Subquery(
CompoundActivityResult.objects.filter(
compound_id=OuterRef('id')
).filter(activity=OuterRef('best_activity')).annotate(
_best_activity_ppi_family_name=F('test_activity_description__ppi__family__name')
).values('_best_activity_ppi_family_name')[:1]
),
le=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_le=Cast(1.37 * Max('compoundactivityresult__activity') / F('nb_atom_non_h'), FloatField())
).values('_le')[:1]
),
lle=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_lle=Cast(Max('compoundactivityresult__activity') - F('a_log_p'), FloatField())
).values('_lle')[:1]
),
lipinsky_mw=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_lipinsky_mw=Case(When(molecular_weight__lte=500, then=True), default=False, output_field=BooleanField())
).values('_lipinsky_mw')[:1]
),
lipinsky_hba=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_lipinsky_hba=Case(When(nb_acceptor_h__lte=10, then=True), default=False, output_field=BooleanField())
).values('_lipinsky_hba')[:1]
),
lipinsky_hbd=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_lipinsky_hbd=Case(When(nb_donor_h__lte=5, then=True), default=False, output_field=BooleanField())
).values('_lipinsky_hbd')[:1]
),
lipinsky_a_log_p=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_lipinsky_a_log_p=Case(When(a_log_p__lte=5, then=True), default=False, output_field=BooleanField())
).values('_lipinsky_a_log_p')[:1]
),
lipinsky_score=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_lipinsky_score=Cast(F('lipinsky_mw'), IntegerField()) + Cast(F('lipinsky_hba'), IntegerField()) +
Cast(F('lipinsky_hbd'), IntegerField()) + Cast(F('lipinsky_a_log_p'), IntegerField())
).values('_lipinsky_score')[:1]
),
lipinsky=Subquery(
Compound.objects.filter(
id=OuterRef('id')
).annotate(
_lipinsky=Case(When(lipinsky_score__gte=3, then=True), default=False, output_field=BooleanField())
).values('_lipinsky')[:1]
),
hba_hbd=Subquery(
Compound.objects.filter(<