From 0a597cbbe53f15e5860203cc594519d6f0c8f6f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20=20MENAGER?= <herve.menager@pasteur.fr> Date: Tue, 20 Nov 2018 20:44:40 +0100 Subject: [PATCH] add Drugbank compound similarity computation Former-commit-id: 528aac3f93450a16ae1fb8cc56dda0cf0080afb2 --- ippisite/db.sqlite3.REMOVED.git-id | 2 +- .../management/commands/import_drugbank.py | 24 ++++++++++++++++-- .../0016_drugbankcompoundtanimoto.py | 25 +++++++++++++++++++ ippisite/ippidb/models.py | 6 +++++ 4 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py diff --git a/ippisite/db.sqlite3.REMOVED.git-id b/ippisite/db.sqlite3.REMOVED.git-id index 075f65e3..c765cbc7 100644 --- a/ippisite/db.sqlite3.REMOVED.git-id +++ b/ippisite/db.sqlite3.REMOVED.git-id @@ -1 +1 @@ -371b75bfeea17504694f84b9c6a2fe579e2249a1 \ No newline at end of file +2de38b2f569a1cecc7dd4e6dd360602d2f81e8a5 \ No newline at end of file diff --git a/ippisite/ippidb/management/commands/import_drugbank.py b/ippisite/ippidb/management/commands/import_drugbank.py index 8404e46b..2c1c2782 100644 --- a/ippisite/ippidb/management/commands/import_drugbank.py +++ b/ippisite/ippidb/management/commands/import_drugbank.py @@ -1,8 +1,12 @@ -from ippidb.models import DrugBankCompound +import operator + import pandas as pd from django.core.management import BaseCommand, CommandError +from ippidb.models import DrugBankCompound, DrugbankCompoundTanimoto, Compound +from ippidb.utils import FingerPrinter + class Command(BaseCommand): @@ -16,6 +20,7 @@ class Command(BaseCommand): ) def handle(self, *args, **options): + ''' df = pd.read_csv(options['path'],sep='\t') DrugBankCompound.objects.all().delete() self.stdout.write( @@ -37,4 +42,19 @@ class Command(BaseCommand): else: self.stdout.write( self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID']))) - + ''' + DrugbankCompoundTanimoto.objects.all().delete() + self.stdout.write( + self.style.SUCCESS('Successfully flushed DrugBank Compound Tanimoto table')) + smiles_dict = {c.id:c.canonical_smiles for c in DrugBankCompound.objects.all()} + for c in Compound.objects.all(): + fingerprinter = FingerPrinter("ECFP4") + #1. compute tanimoto for SMILES query vs all compounds + tanimoto_dict = fingerprinter.tanimoto_smiles(c.canonical_smile, smiles_dict) + tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15]) + dbcts = [] + for id_, tanimoto in tanimoto_dict.items(): + dbcts.append(DrugbankCompoundTanimoto(compound=c, drugbank_compound=DrugBankCompound.objects.get(id=id_), tanimoto=tanimoto)) + DrugbankCompoundTanimoto.objects.bulk_create(dbcts) + self.stdout.write( + self.style.SUCCESS('Successfully inserted {} DrugBank Compound Tanimoto lines for Compound {}'.format(len(dbcts), c.id))) diff --git a/ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py b/ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py new file mode 100644 index 00000000..d86f908b --- /dev/null +++ b/ippisite/ippidb/migrations/0016_drugbankcompoundtanimoto.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2018-11-19 22:20 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('ippidb', '0015_drugbankcompound'), + ] + + operations = [ + migrations.CreateModel( + name='DrugbankCompoundTanimoto', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('tanimoto', models.DecimalField(decimal_places=4, max_digits=5, verbose_name='Tanimoto value')), + ('compound', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ippidb.Compound')), + ('drugbank_compound', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ippidb.DrugBankCompound')), + ], + ), + ] diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py index 7508ba7d..b716cc7c 100644 --- a/ippisite/ippidb/models.py +++ b/ippisite/ippidb/models.py @@ -736,3 +736,9 @@ class DrugBankCompound(models.Model): common_name = models.TextField('Common name') canonical_smiles = models.TextField( 'Canonical SMILES') + +class DrugbankCompoundTanimoto(models.Model): + compound = models.ForeignKey(Compound, models.CASCADE) + drugbank_compound = models.ForeignKey(DrugBankCompound, models.CASCADE) + tanimoto = models.DecimalField( + 'Tanimoto value', max_digits=5, decimal_places=4) \ No newline at end of file -- GitLab