From 8d877cabececa4453da59d7ef982879fa4b6ed5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20=20MENAGER?= <herve.menager@pasteur.fr> Date: Thu, 3 May 2018 23:29:35 +0200 Subject: [PATCH] add PCA chart to compound card, WIP this work is an implementation of #49, and includes code written by @plaville. Former-commit-id: 76daaabbc5c17f5e26f9c203c7ef54f6197d3386 --- ippisite/db.sqlite3.REMOVED.git-id | 2 +- ippisite/ippidb/management/commands/pca.py | 55 +++++++++++++++++++ .../migrations/0034_auto_20180503_2110.py | 26 +++++++++ ippisite/ippidb/models.py | 3 + ippisite/ippidb/templates/base.html | 2 +- ippisite/ippidb/templates/compound_card.html | 13 ++++- ippisite/ippidb/views.py | 7 ++- ippisite/requirements.txt | 2 + 8 files changed, 104 insertions(+), 6 deletions(-) create mode 100644 ippisite/ippidb/management/commands/pca.py create mode 100644 ippisite/ippidb/migrations/0034_auto_20180503_2110.py diff --git a/ippisite/db.sqlite3.REMOVED.git-id b/ippisite/db.sqlite3.REMOVED.git-id index 0fc6bb90..0a4ddca2 100644 --- a/ippisite/db.sqlite3.REMOVED.git-id +++ b/ippisite/db.sqlite3.REMOVED.git-id @@ -1 +1 @@ -5fb9ed7a41b37ceaa343fd897c2db6c4123dfea8 \ No newline at end of file +9bc693989d626dd0e41c7e205c09395148196f5f \ No newline at end of file diff --git a/ippisite/ippidb/management/commands/pca.py b/ippisite/ippidb/management/commands/pca.py new file mode 100644 index 00000000..ffd86af4 --- /dev/null +++ b/ippisite/ippidb/management/commands/pca.py @@ -0,0 +1,55 @@ +import glob +import json + +from django.core.management import BaseCommand, CommandError +from django.forms.models import model_to_dict +import pandas as pd +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler + +from ippidb.models import Compound, PcaBiplotData + +class Command(BaseCommand): + + help = "Generate the data for the compound LE vs LLE biplot" + + def handle(self, *args, **options): + self.stdout.write(self.style.SUCCESS('Generating the PCA biplot...')) + pca_data = [] + features = ['aromatic_ratio', 'balaban_index', 'fsp3', + 'gc_molar_refractivity', 'log_d', 'a_log_p', + 'mean_atom_vol_vdw', 'molecular_weight', 'nb_acceptor_h', + 'nb_aliphatic_amines', 'nb_aromatic_bonds', + 'nb_aromatic_ether', 'nb_aromatic_sssr', 'nb_atom', + 'nb_atom_non_h', 'nb_benzene_like_rings', 'nb_bonds', + 'nb_bonds_non_h', 'nb_br', 'nb_c', 'nb_chiral_centers', + 'nb_circuits', 'nb_cl', 'nb_csp2', 'nb_csp3', 'nb_donor_h', + 'nb_double_bonds', 'nb_f', 'nb_i', 'nb_multiple_bonds', + 'nb_n', 'nb_o', 'nb_rings', 'nb_rotatable_bonds', + 'randic_index', 'rdf070m', 'rotatable_bond_fraction', + 'sum_atom_polar', 'sum_atom_vol_vdw', 'tpsa', 'ui', + 'wiener_index'] + PcaBiplotData.objects.all().delete() + self.stdout.write( + self.style.SUCCESS('Successfully flushed PCA biplot data')) + l = [] + for comp in Compound.objects.all(): + values = model_to_dict(comp, fields=features + ['id','family']) + values['family'] = comp.best_pXC50_activity_ppi_family + l.append(values) + df = pd.DataFrame(l) + x = df.loc[:, features].values + y = df.loc[:,['family']].values + x = StandardScaler().fit_transform(x) + pca = PCA(n_components=2) + principal_components = pca.fit_transform(x) + principal_df = pd.DataFrame(data = principal_components , columns = ['x', 'y']) + final_df = pd.concat([principal_df, df[['family','id']]], axis = 1) + for index, row in final_df.iterrows(): + pca_data.append({'x': row.x, 'y': row.y, 'id': row.id, 'family_name': row.family}) + pca_json = json.dumps(pca_data, separators=(',',':')) + new = PcaBiplotData() + new.pca_biplot_data = pca_json + new.save() + self.stdout.write( + self.style.SUCCESS('Successfully generated PCA biplot data')) diff --git a/ippisite/ippidb/migrations/0034_auto_20180503_2110.py b/ippisite/ippidb/migrations/0034_auto_20180503_2110.py new file mode 100644 index 00000000..5ed85ffc --- /dev/null +++ b/ippisite/ippidb/migrations/0034_auto_20180503_2110.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2018-05-03 21:10 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ippidb', '0033_auto_20180502_1500'), + ] + + operations = [ + migrations.CreateModel( + name='PcaBiplotData', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('pca_biplot_data', models.CharField(blank=True, max_length=150000, null=True, verbose_name='PCA biplot JSON data')), + ], + ), + migrations.AlterModelOptions( + name='ppifamily', + options={'verbose_name_plural': 'PPI Families'}, + ), + ] diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py index 2a14d8ca..7d2276d2 100644 --- a/ippisite/ippidb/models.py +++ b/ippisite/ippidb/models.py @@ -493,6 +493,9 @@ class Compound(models.Model): return list(set([ca.ppi.family for ca in self.compoundaction_set.all()])) +class PcaBiplotData(models.Model): + pca_biplot_data = models.CharField('PCA biplot JSON data', max_length=150000, blank=True, null=True) + class LeLleBiplotData(models.Model): le_lle_biplot_data = models.CharField('LE-LLE biplot JSON data', max_length=150000, blank=True, null=True) diff --git a/ippisite/ippidb/templates/base.html b/ippisite/ippidb/templates/base.html index 2658a79c..5e37787e 100644 --- a/ippisite/ippidb/templates/base.html +++ b/ippisite/ippidb/templates/base.html @@ -93,7 +93,7 @@ data: radarChartData, options: { scaleLineWidth : 1, pointLabelFontFamily : "'Helvetica Neue'", pointLabelFontSize : 12, scaleOverride : true, scaleSteps : 5, scaleStepWidth : 0.2}}); }; - var drawLeLleBiplotChart = function(canvasId, compoundId, compoundFamily, plotData, tabHash){ + var drawCompoundsBiplotChart = function(canvasId, compoundId, compoundFamily, plotData, tabHash){ var currentCompoundData = []; var currentFamilyData = []; var otherFamiliesData = []; diff --git a/ippisite/ippidb/templates/compound_card.html b/ippisite/ippidb/templates/compound_card.html index e3ecb6d5..4f19cd73 100644 --- a/ippisite/ippidb/templates/compound_card.html +++ b/ippisite/ippidb/templates/compound_card.html @@ -158,6 +158,17 @@ </div> </div> </div> + <div class="card col-sm-12 col-md-6"> + <h5 class="card-header">PCA : iPPI-DB chemical space</h5> + <div class="card-body"> + + <canvas id="pca_biplot"></canvas> + <script> + drawCompoundsBiplotChart('pca_biplot', {{ compound.id }}, '{{ compound.best_pXC50_activity_ppi_family|default_if_none:"No target family identified" }}', {{ pca_biplot_data | safe }}, 'physicochemistry'); + </script> + </div> + </div> + </div> </div> <div class="tab-pane fade" id="pharmacology" role="tabpanel" aria-labelledby="pharmacology-tab"> <div class="card col-sm-12 col-md-12"> @@ -165,7 +176,7 @@ <div class="card-body"> <canvas id="le_lle_biplot"></canvas> <script> - drawLeLleBiplotChart('le_lle_biplot', {{ compound.id }}, '{{ compound.best_pXC50_activity_ppi_family|default_if_none:"No target family identified" }}', {{ biplot_data | safe }}, 'pharmacology'); + drawCompoundsBiplotChart('le_lle_biplot', {{ compound.id }}, '{{ compound.best_pXC50_activity_ppi_family|default_if_none:"No target family identified" }}', {{ le_lle_biplot_data | safe }}, 'pharmacology'); </script> </div> </div> diff --git a/ippisite/ippidb/views.py b/ippisite/ippidb/views.py index a8cabb85..42c4b1f1 100644 --- a/ippisite/ippidb/views.py +++ b/ippisite/ippidb/views.py @@ -5,7 +5,7 @@ from django.http import HttpResponseRedirect, Http404 from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger from formtools.wizard.views import SessionWizardView, NamedUrlSessionWizardView from .forms import IdForm, BibliographyForm, PDBForm, ProteinForm, ComplexCompositionForm, ComplexCompositionFormSet, ProteinDomainComplexTypeForm, ProteinDomainComplexForm, PpiForm, PpiComplexForm, PpiAndComplexForm, ProteinFormSet,TestsForm, CompoundForm, CompoundFormSet -from .models import Protein, Bibliography, ProteinDomainComplex, ProteinDomainBoundComplex, RefCompoundBiblio, TestActivityDescription, Compound, Ppi, Disease, Taxonomy, LeLleBiplotData +from .models import Protein, Bibliography, ProteinDomainComplex, ProteinDomainBoundComplex, RefCompoundBiblio, TestActivityDescription, Compound, Ppi, Disease, Taxonomy, LeLleBiplotData, PcaBiplotData from .ws import get_pdb_uniprot_mapping @@ -237,5 +237,6 @@ def compound_card(request, compound_id): compound = Compound.objects.get(id=int(compound_id)) except Compound.DoesNotExist: raise Http404("No compound data for %s:%s" % (compound_id)) - biplot_data = LeLleBiplotData.objects.get().le_lle_biplot_data - return render(request, 'compound_card.html', {'compound': compound, 'biplot_data': biplot_data}) + le_lle_biplot_data = LeLleBiplotData.objects.get().le_lle_biplot_data + pca_biplot_data = PcaBiplotData.objects.get().pca_biplot_data + return render(request, 'compound_card.html', {'compound': compound, 'le_lle_biplot_data': le_lle_biplot_data, 'pca_biplot_data': pca_biplot_data}) diff --git a/ippisite/requirements.txt b/ippisite/requirements.txt index 946f211d..1c592c35 100644 --- a/ippisite/requirements.txt +++ b/ippisite/requirements.txt @@ -12,3 +12,5 @@ django-debug-toolbar mod_wsgi bioblend django-allauth +sklearn +scipy -- GitLab