From cd6077093130e7b0bb43ea662a76c12916ac1c46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20=20MENAGER?= <herve.menager@pasteur.fr>
Date: Sun, 25 Nov 2018 19:42:35 +0100
Subject: [PATCH] fix drugbank similarities computation

see #96


Former-commit-id: 7f0a26180c2fb18638e0293c5ecbc123b4a9271f
---
 .../management/commands/import_drugbank.py     |  9 +++++++--
 ippisite/ippidb/models.py                      | 18 +-----------------
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/ippisite/ippidb/management/commands/import_drugbank.py b/ippisite/ippidb/management/commands/import_drugbank.py
index 7d024e49..bb470cff 100644
--- a/ippisite/ippidb/management/commands/import_drugbank.py
+++ b/ippisite/ippidb/management/commands/import_drugbank.py
@@ -25,12 +25,13 @@ class Command(BaseCommand):
         self.stdout.write(
             self.style.SUCCESS('Successfully flushed DrugBank Compound table'))
         for index, row in df.iterrows():
+            # insert all drugbank compounds in the DB
             try:
                 dbc = DrugBankCompound()
                 dbc.id = row.loc['DRUGBANK_ID']
                 dbc.common_name = row.loc['COMMON_NAME']
                 dbc.canonical_smiles = row.loc['CanSmile']
-                dbc.save(autofill=True)
+                dbc.save()
             except Exception:
                 self.stdout.write(
                     self.style.ERROR('Failed inserting {}'.format(row.loc['DRUGBANK_ID'])))
@@ -40,4 +41,8 @@ class Command(BaseCommand):
                     'Failed inserting {}'.format(row.loc['DRUGBANK_ID']))
             else:
                 self.stdout.write(
-                    self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID'])))
\ No newline at end of file
+                    self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID'])))
+        for c in Compound.objects.all():
+            # for each iPPI-DB compound compute the most similar drugbank compounds
+            c.save(autofill=True)
+            self.stdout.write(self.style.SUCCESS('Successfully computed 15 most similar compounds for {}'.format(c.id)))
diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py
index 0f39e8a3..80acd12e 100644
--- a/ippisite/ippidb/models.py
+++ b/ippisite/ippidb/models.py
@@ -749,29 +749,13 @@ class RefCompoundBiblio(models.Model):
     class Meta:
         unique_together = (('compound', 'bibliography'),)
 
-class DrugBankCompound(AutoFillableModel):
+class DrugBankCompound(models.Model):
     id = models.TextField(
         'Drugbank ID', unique=True, primary_key=True)
     common_name = models.TextField('Common name')
     canonical_smiles = models.TextField(
         'Canonical SMILES')
 
-    def autofill(self):
-        self.compute_compound_similarity()
-
-    def compute_compound_similarity(self):
-        """ compute Tanimoto similarity to existing compounds """
-        self.save()
-        fingerprinter = FingerPrinter("FP4")
-        #1. compute tanimoto for SMILES query vs all compounds
-        smiles_dict = {c.id:c.canonical_smile for c in Compound.objects.all()}
-        tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smiles, smiles_dict)
-        tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
-        dbcts = []
-        for id_, tanimoto in tanimoto_dict.items():
-            dbcts.append(DrugbankCompoundTanimoto(compound=Compound.objects.get(id=id_), drugbank_compound=self, tanimoto=tanimoto))
-        DrugbankCompoundTanimoto.objects.bulk_create(dbcts)
-
 class DrugbankCompoundTanimoto(models.Model):
     compound = models.ForeignKey(Compound, models.CASCADE)
     drugbank_compound = models.ForeignKey(DrugBankCompound, models.CASCADE)
-- 
GitLab