Commit d27302bb authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

finalize drugbank compounds similarity computation


Former-commit-id: 5f1bf6c840c68c4dbcc059f24b102d10efed5579
parent 72cd14f3
2de38b2f569a1cecc7dd4e6dd360602d2f81e8a5
\ No newline at end of file
b0dbcad73864aeff81c728c88cb5fbafb0d40427
\ No newline at end of file
......@@ -30,7 +30,7 @@ class Command(BaseCommand):
dbc.id = row.loc['DRUGBANK_ID']
dbc.common_name = row.loc['COMMON_NAME']
dbc.canonical_smiles = row.loc['CanSmile']
dbc.save()
dbc.save(autofill=True)
except Exception:
self.stdout.write(
self.style.ERROR('Failed inserting {}'.format(row.loc['DRUGBANK_ID'])))
......@@ -40,19 +40,4 @@ class Command(BaseCommand):
'Failed inserting {}'.format(row.loc['DRUGBANK_ID']))
else:
self.stdout.write(
self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID'])))
DrugbankCompoundTanimoto.objects.all().delete()
self.stdout.write(
self.style.SUCCESS('Successfully flushed DrugBank Compound Tanimoto table'))
smiles_dict = {c.id:c.canonical_smiles for c in DrugBankCompound.objects.all()}
for c in Compound.objects.all():
fingerprinter = FingerPrinter("ECFP4")
#1. compute tanimoto for SMILES query vs all compounds
tanimoto_dict = fingerprinter.tanimoto_smiles(c.canonical_smile, smiles_dict)
tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
dbcts = []
for id_, tanimoto in tanimoto_dict.items():
dbcts.append(DrugbankCompoundTanimoto(compound=c, drugbank_compound=DrugBankCompound.objects.get(id=id_), tanimoto=tanimoto))
DrugbankCompoundTanimoto.objects.bulk_create(dbcts)
self.stdout.write(
self.style.SUCCESS('Successfully inserted {} DrugBank Compound Tanimoto lines for Compound {}'.format(len(dbcts), c.id)))
self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID'])))
\ No newline at end of file
from __future__ import unicode_literals
import operator
from django.db import models
from django.db.models import Max
......@@ -340,6 +341,19 @@ class Compound(AutoFillableModel):
class Meta:
ordering = ['id']
def compute_drugbank_compound_similarity(self):
""" compute Tanimoto similarity to existing DrugBank compounds """
self.save()
fingerprinter = FingerPrinter("FP4")
#1. compute tanimoto for SMILES query vs all compounds
smiles_dict = {c.id:c.canonical_smiles for c in DrugBankCompound.objects.all()}
tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smile, smiles_dict)
tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
dbcts = []
for id_, tanimoto in tanimoto_dict.items():
dbcts.append(DrugbankCompoundTanimoto(compound=self, drugbank_compound=DrugBankCompound.objects.get(id=id_), tanimoto=tanimoto))
DrugbankCompoundTanimoto.objects.bulk_create(dbcts)
@property
def biblio_refs(self):
"""
......@@ -505,6 +519,7 @@ class Compound(AutoFillableModel):
# compute InChi and InChiKey
self.inchi = smi2inchi(self.canonical_smile)
self.inchikey = smi2inchikey(self.canonical_smile)
self.compute_drugbank_compound_similarity()
def __str__(self):
return 'Compound #{}'.format(self.id)
......@@ -734,13 +749,29 @@ class RefCompoundBiblio(models.Model):
class Meta:
unique_together = (('compound', 'bibliography'),)
class DrugBankCompound(models.Model):
class DrugBankCompound(AutoFillableModel):
id = models.TextField(
'Drugbank ID', unique=True, primary_key=True)
common_name = models.TextField('Common name')
canonical_smiles = models.TextField(
'Canonical SMILES')
def autofill(self):
self.compute_compound_similarity()
def compute_compound_similarity(self):
""" compute Tanimoto similarity to existing compounds """
self.save()
fingerprinter = FingerPrinter("FP4")
#1. compute tanimoto for SMILES query vs all compounds
smiles_dict = {c.id:c.canonical_smile for c in Compound.objects.all()}
tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smiles, smiles_dict)
tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
dbcts = []
for id_, tanimoto in tanimoto_dict.items():
dbcts.append(DrugbankCompoundTanimoto(compound=Compound.objects.get(id=id_), drugbank_compound=self, tanimoto=tanimoto))
DrugbankCompoundTanimoto.objects.bulk_create(dbcts)
class DrugbankCompoundTanimoto(models.Model):
compound = models.ForeignKey(Compound, models.CASCADE)
drugbank_compound = models.ForeignKey(DrugBankCompound, models.CASCADE)
......
......@@ -4,6 +4,7 @@ from django.test import TestCase
from openbabel import vectorUnsignedInt, OBFingerprint
from .models import Compound, CompoundTanimoto, create_tanimoto
from .models import DrugBankCompound, DrugbankCompoundTanimoto
from .utils import FingerPrinter, mol2smi, smi2mol, smi2inchi, smi2inchikey
class MolSmiTestCase(TestCase):
......@@ -87,6 +88,64 @@ class FingerPrinterTestCaseCompound1ECFP4(TestCase):
def test_tanimoto_smiles(self):
self.assertEqual(self.fingerprinter.tanimoto_smiles(self.smiles, self.smiles_dict), self.tanimoto_dict)
def create_dummy_compound(id_, smiles):
c = Compound()
c.id = id_
c.canonical_smile = smiles
c.is_macrocycle=True
c.aromatic_ratio = 0.0
c.balaban_index = 0.0
c.fsp3 = 0.0
c.gc_molar_refractivity = 0.0
c.log_d = 0.0
c.a_log_p = 0.0
c.gc_molar_refractivity = 0.0
c.mean_atom_vol_vdw = 0.0
c.molecular_weight = 0.0
c.nb_acceptor_h = 0
c.nb_aliphatic_amines = 0
c.nb_aromatic_bonds = 0
c.nb_aromatic_ether = 0
c.nb_aromatic_sssr = 0
c.nb_atom = 0
c.nb_atom_non_h = 0
c.nb_benzene_like_rings = 0
c.nb_bonds = 0
c.nb_bonds_non_h = 0
c.nb_br = 0
c.nb_c = 0
c.nb_chiral_centers = 0
c.nb_circuits = 0
c.nb_cl = 0
c.nb_csp2 = 0
c.nb_csp3 = 0
c.nb_donor_h = 0
c.nb_double_bonds = 0
c.nb_f = 0
c.nb_i = 0
c.nb_multiple_bonds = 0
c.nb_n = 0
c.nb_o = 0
c.nb_rings = 0
c.nb_rotatable_bonds = 0
c.randic_index = 0
c.rdf070m = 0
c.rotatable_bond_fraction = 0
c.sum_atom_polar = 0
c.sum_atom_vol_vdw = 0
c.tpsa = 0
c.ui = 0
c.wiener_index = 0
c.save(autofill=True)
def create_dummy_drugbank_compound(id_, smiles):
dbc = DrugBankCompound()
dbc.id = id_
dbc.common_name = 'DrugBankCompound' + str(id_)
dbc.canonical_smiles = smiles
dbc.save(autofill=True)
class CompoundTanimotoTestCase(TestCase):
......@@ -94,54 +153,7 @@ class CompoundTanimotoTestCase(TestCase):
self.smiles_dict = {1:"CC", 2:"CCC"}
self.smiles_query = "CC"
for id_, smiles in self.smiles_dict.items():
c = Compound()
c.id = id_
c.canonical_smile = smiles
c.is_macrocycle=True
c.aromatic_ratio = 0.0
c.balaban_index = 0.0
c.fsp3 = 0.0
c.gc_molar_refractivity = 0.0
c.log_d = 0.0
c.a_log_p = 0.0
c.gc_molar_refractivity = 0.0
c.mean_atom_vol_vdw = 0.0
c.molecular_weight = 0.0
c.nb_acceptor_h = 0
c.nb_aliphatic_amines = 0
c.nb_aromatic_bonds = 0
c.nb_aromatic_ether = 0
c.nb_aromatic_sssr = 0
c.nb_atom = 0
c.nb_atom_non_h = 0
c.nb_benzene_like_rings = 0
c.nb_bonds = 0
c.nb_bonds_non_h = 0
c.nb_br = 0
c.nb_c = 0
c.nb_chiral_centers = 0
c.nb_circuits = 0
c.nb_cl = 0
c.nb_csp2 = 0
c.nb_csp3 = 0
c.nb_donor_h = 0
c.nb_double_bonds = 0
c.nb_f = 0
c.nb_i = 0
c.nb_multiple_bonds = 0
c.nb_n = 0
c.nb_o = 0
c.nb_rings = 0
c.nb_rotatable_bonds = 0
c.randic_index = 0
c.rdf070m = 0
c.rotatable_bond_fraction = 0
c.sum_atom_polar = 0
c.sum_atom_vol_vdw = 0
c.tpsa = 0
c.ui = 0
c.wiener_index = 0
c.save()
create_dummy_compound(id_, smiles)
def test_create(self):
create_tanimoto(self.smiles_query,"FP4")
......@@ -152,62 +164,42 @@ class CompoundTanimotoTestCase(TestCase):
class CompoundTanimotoTestCaseCompound1ECFP4(TestCase):
def setUp(self):
self.smiles_dict = {1:"CC(C)C(=O)c1cc(C(=O)c2ccc(Oc3ccccc3)cc2)c(O)c(O)c1O", 2:"NC(=N)N[C@H](C1CCCCC1)C(=O)NCC(=O)N1CCC(CC1)c1cc(n[nH]1)-c1ccc(Cl)cc1Cl"}
self.smiles_query = "CC(C)C(=O)c1cc(C(=O)c2ccc(Oc3ccccc3)cc2)c(O)c(O)c1O"
for id_, smiles in self.smiles_dict.items():
c = Compound()
c.id = id_
c.canonical_smile = smiles
c.is_macrocycle=True
c.aromatic_ratio = 0.0
c.balaban_index = 0.0
c.fsp3 = 0.0
c.gc_molar_refractivity = 0.0
c.log_d = 0.0
c.a_log_p = 0.0
c.gc_molar_refractivity = 0.0
c.mean_atom_vol_vdw = 0.0
c.molecular_weight = 0.0
c.nb_acceptor_h = 0
c.nb_aliphatic_amines = 0
c.nb_aromatic_bonds = 0
c.nb_aromatic_ether = 0
c.nb_aromatic_sssr = 0
c.nb_atom = 0
c.nb_atom_non_h = 0
c.nb_benzene_like_rings = 0
c.nb_bonds = 0
c.nb_bonds_non_h = 0
c.nb_br = 0
c.nb_c = 0
c.nb_chiral_centers = 0
c.nb_circuits = 0
c.nb_cl = 0
c.nb_csp2 = 0
c.nb_csp3 = 0
c.nb_donor_h = 0
c.nb_double_bonds = 0
c.nb_f = 0
c.nb_i = 0
c.nb_multiple_bonds = 0
c.nb_n = 0
c.nb_o = 0
c.nb_rings = 0
c.nb_rotatable_bonds = 0
c.randic_index = 0
c.rdf070m = 0
c.rotatable_bond_fraction = 0
c.sum_atom_polar = 0
c.sum_atom_vol_vdw = 0
c.tpsa = 0
c.ui = 0
c.wiener_index = 0
c.save()
create_dummy_compound(id_, smiles)
def test_create(self):
create_tanimoto(self.smiles_query,"ECFP4")
ct = CompoundTanimoto.objects.get(id=1,canonical_smiles=self.smiles_query)
ct2 = CompoundTanimoto.objects.get(id=2,canonical_smiles=self.smiles_query)
self.assertEqual(ct.tanimoto, 1.0)
self.assertEqual(float(ct2.tanimoto),0.0971)
\ No newline at end of file
self.assertEqual(float(ct2.tanimoto),0.0971)
class CompoundDrugBankCompoundTanimotoTest(TestCase):
def setUp(self):
self.compound_smiles_dict = {1:"CC", 2:"CCC"}
self.drugbankcompound_smiles_dict = {1:"CC"}
def test_create_compound_first(self):
for id_, smiles in self.compound_smiles_dict.items():
create_dummy_compound(id_, smiles)
for id_, smiles in self.drugbankcompound_smiles_dict.items():
create_dummy_drugbank_compound(id_, smiles)
dbct = DrugbankCompoundTanimoto.objects.get(compound_id=1, drugbank_compound_id=1)
self.assertEqual(dbct.tanimoto, 1.0)
dbct2 = DrugbankCompoundTanimoto.objects.get(compound_id=2, drugbank_compound_id=1)
self.assertEqual(dbct2.tanimoto, 0.5)
def test_create_drugbank_compound_first(self):
for id_, smiles in self.drugbankcompound_smiles_dict.items():
create_dummy_drugbank_compound(id_, smiles)
for id_, smiles in self.compound_smiles_dict.items():
create_dummy_compound(id_, smiles)
dbct = DrugbankCompoundTanimoto.objects.get(compound_id=1, drugbank_compound_id=1)
self.assertEqual(dbct.tanimoto, 1.0)
dbct2 = DrugbankCompoundTanimoto.objects.get(compound_id=2, drugbank_compound_id=1)
self.assertEqual(dbct2.tanimoto, 0.5)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment