From 0a597cbbe53f15e5860203cc594519d6f0c8f6f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20=20MENAGER?= <herve.menager@pasteur.fr>
Date: Tue, 20 Nov 2018 20:44:40 +0100
Subject: [PATCH] add Drugbank compound similarity computation

Former-commit-id: 528aac3f93450a16ae1fb8cc56dda0cf0080afb2
---
 ippisite/db.sqlite3.REMOVED.git-id            |  2 +-
 .../management/commands/import_drugbank.py    | 24 ++++++++++++++++--
 .../0016_drugbankcompoundtanimoto.py          | 25 +++++++++++++++++++
 ippisite/ippidb/models.py                     |  6 +++++
 4 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py

diff --git a/ippisite/db.sqlite3.REMOVED.git-id b/ippisite/db.sqlite3.REMOVED.git-id
index 075f65e3..c765cbc7 100644
--- a/ippisite/db.sqlite3.REMOVED.git-id
+++ b/ippisite/db.sqlite3.REMOVED.git-id
@@ -1 +1 @@
-371b75bfeea17504694f84b9c6a2fe579e2249a1
\ No newline at end of file
+2de38b2f569a1cecc7dd4e6dd360602d2f81e8a5
\ No newline at end of file
diff --git a/ippisite/ippidb/management/commands/import_drugbank.py b/ippisite/ippidb/management/commands/import_drugbank.py
index 8404e46b..2c1c2782 100644
--- a/ippisite/ippidb/management/commands/import_drugbank.py
+++ b/ippisite/ippidb/management/commands/import_drugbank.py
@@ -1,8 +1,12 @@
-from ippidb.models import DrugBankCompound
+import operator
+
 import pandas as pd
 
 from django.core.management import BaseCommand, CommandError
 
+from ippidb.models import DrugBankCompound, DrugbankCompoundTanimoto, Compound
+from ippidb.utils import FingerPrinter
+
 
 class Command(BaseCommand):
 
@@ -16,6 +20,7 @@ class Command(BaseCommand):
         )
 
     def handle(self, *args, **options):
+        '''
         df = pd.read_csv(options['path'],sep='\t')
         DrugBankCompound.objects.all().delete()
         self.stdout.write(
@@ -37,4 +42,19 @@ class Command(BaseCommand):
             else:
                 self.stdout.write(
                     self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID'])))
-        
+        '''
+        DrugbankCompoundTanimoto.objects.all().delete()
+        self.stdout.write(
+            self.style.SUCCESS('Successfully flushed DrugBank Compound Tanimoto table'))
+        smiles_dict = {c.id:c.canonical_smiles for c in DrugBankCompound.objects.all()}
+        for c in Compound.objects.all():
+            fingerprinter = FingerPrinter("ECFP4")
+            #1. compute tanimoto for SMILES query vs all compounds
+            tanimoto_dict = fingerprinter.tanimoto_smiles(c.canonical_smile, smiles_dict)
+            tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15])
+            dbcts = []
+            for id_, tanimoto in tanimoto_dict.items():
+                dbcts.append(DrugbankCompoundTanimoto(compound=c, drugbank_compound=DrugBankCompound.objects.get(id=id_), tanimoto=tanimoto))
+            DrugbankCompoundTanimoto.objects.bulk_create(dbcts)
+            self.stdout.write(
+                self.style.SUCCESS('Successfully inserted {} DrugBank Compound Tanimoto lines for Compound {}'.format(len(dbcts), c.id)))
diff --git a/ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py b/ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py
new file mode 100644
index 00000000..d86f908b
--- /dev/null
+++ b/ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2018-11-19 22:20
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('ippidb', '0015_drugbankcompound'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='DrugbankCompoundTanimoto',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('tanimoto', models.DecimalField(decimal_places=4, max_digits=5, verbose_name='Tanimoto value')),
+                ('compound', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ippidb.Compound')),
+                ('drugbank_compound', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ippidb.DrugBankCompound')),
+            ],
+        ),
+    ]
diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py
index 7508ba7d..b716cc7c 100644
--- a/ippisite/ippidb/models.py
+++ b/ippisite/ippidb/models.py
@@ -736,3 +736,9 @@ class DrugBankCompound(models.Model):
     common_name = models.TextField('Common name')
     canonical_smiles = models.TextField(
         'Canonical SMILES')
+
+class DrugbankCompoundTanimoto(models.Model):
+    compound = models.ForeignKey(Compound, models.CASCADE)
+    drugbank_compound = models.ForeignKey(DrugBankCompound, models.CASCADE)
+    tanimoto = models.DecimalField(
+        'Tanimoto value', max_digits=5, decimal_places=4)
\ No newline at end of file
-- 
GitLab