From cd6077093130e7b0bb43ea662a76c12916ac1c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20=20MENAGER?= <herve.menager@pasteur.fr> Date: Sun, 25 Nov 2018 19:42:35 +0100 Subject: [PATCH] fix drugbank similarities computation see #96 Former-commit-id: 7f0a26180c2fb18638e0293c5ecbc123b4a9271f --- .../management/commands/import_drugbank.py | 9 +++++++-- ippisite/ippidb/models.py | 18 +----------------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/ippisite/ippidb/management/commands/import_drugbank.py b/ippisite/ippidb/management/commands/import_drugbank.py index 7d024e49..bb470cff 100644 --- a/ippisite/ippidb/management/commands/import_drugbank.py +++ b/ippisite/ippidb/management/commands/import_drugbank.py @@ -25,12 +25,13 @@ class Command(BaseCommand): self.stdout.write( self.style.SUCCESS('Successfully flushed DrugBank Compound table')) for index, row in df.iterrows(): + # insert all drugbank compounds in the DB try: dbc = DrugBankCompound() dbc.id = row.loc['DRUGBANK_ID'] dbc.common_name = row.loc['COMMON_NAME'] dbc.canonical_smiles = row.loc['CanSmile'] - dbc.save(autofill=True) + dbc.save() except Exception: self.stdout.write( self.style.ERROR('Failed inserting {}'.format(row.loc['DRUGBANK_ID']))) @@ -40,4 +41,8 @@ class Command(BaseCommand): 'Failed inserting {}'.format(row.loc['DRUGBANK_ID'])) else: self.stdout.write( - self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID']))) \ No newline at end of file + self.style.SUCCESS('Successfully inserted {}'.format(row.loc['DRUGBANK_ID']))) + for c in Compound.objects.all(): + # for each iPPI-DB compound compute the most similar drugbank compounds + c.save(autofill=True) + self.stdout.write(self.style.SUCCESS('Successfully computed 15 most similar compounds for {}'.format(c.id))) diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py index 0f39e8a3..80acd12e 100644 --- a/ippisite/ippidb/models.py +++ b/ippisite/ippidb/models.py @@ -749,29 +749,13 @@ class RefCompoundBiblio(models.Model): class Meta: unique_together = (('compound', 'bibliography'),) -class DrugBankCompound(AutoFillableModel): +class DrugBankCompound(models.Model): id = models.TextField( 'Drugbank ID', unique=True, primary_key=True) common_name = models.TextField('Common name') canonical_smiles = models.TextField( 'Canonical SMILES') - def autofill(self): - self.compute_compound_similarity() - - def compute_compound_similarity(self): - """ compute Tanimoto similarity to existing compounds """ - self.save() - fingerprinter = FingerPrinter("FP4") - #1. compute tanimoto for SMILES query vs all compounds - smiles_dict = {c.id:c.canonical_smile for c in Compound.objects.all()} - tanimoto_dict = fingerprinter.tanimoto_smiles(self.canonical_smiles, smiles_dict) - tanimoto_dict = dict(sorted(tanimoto_dict.items(), key=operator.itemgetter(1), reverse=True)[:15]) - dbcts = [] - for id_, tanimoto in tanimoto_dict.items(): - dbcts.append(DrugbankCompoundTanimoto(compound=Compound.objects.get(id=id_), drugbank_compound=self, tanimoto=tanimoto)) - DrugbankCompoundTanimoto.objects.bulk_create(dbcts) - class DrugbankCompoundTanimoto(models.Model): compound = models.ForeignKey(Compound, models.CASCADE) drugbank_compound = models.ForeignKey(DrugBankCompound, models.CASCADE) -- GitLab