From 8d877cabececa4453da59d7ef982879fa4b6ed5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20=20MENAGER?= <herve.menager@pasteur.fr>
Date: Thu, 3 May 2018 23:29:35 +0200
Subject: [PATCH] add PCA chart to compound card, WIP

this work is an implementation of #49, and includes code written
by @plaville.


Former-commit-id: 76daaabbc5c17f5e26f9c203c7ef54f6197d3386
---
 ippisite/db.sqlite3.REMOVED.git-id            |  2 +-
 ippisite/ippidb/management/commands/pca.py    | 55 +++++++++++++++++++
 .../migrations/0034_auto_20180503_2110.py     | 26 +++++++++
 ippisite/ippidb/models.py                     |  3 +
 ippisite/ippidb/templates/base.html           |  2 +-
 ippisite/ippidb/templates/compound_card.html  | 13 ++++-
 ippisite/ippidb/views.py                      |  7 ++-
 ippisite/requirements.txt                     |  2 +
 8 files changed, 104 insertions(+), 6 deletions(-)
 create mode 100644 ippisite/ippidb/management/commands/pca.py
 create mode 100644 ippisite/ippidb/migrations/0034_auto_20180503_2110.py

diff --git a/ippisite/db.sqlite3.REMOVED.git-id b/ippisite/db.sqlite3.REMOVED.git-id
index 0fc6bb90..0a4ddca2 100644
--- a/ippisite/db.sqlite3.REMOVED.git-id
+++ b/ippisite/db.sqlite3.REMOVED.git-id
@@ -1 +1 @@
-5fb9ed7a41b37ceaa343fd897c2db6c4123dfea8
\ No newline at end of file
+9bc693989d626dd0e41c7e205c09395148196f5f
\ No newline at end of file
diff --git a/ippisite/ippidb/management/commands/pca.py b/ippisite/ippidb/management/commands/pca.py
new file mode 100644
index 00000000..ffd86af4
--- /dev/null
+++ b/ippisite/ippidb/management/commands/pca.py
@@ -0,0 +1,55 @@
+import glob
+import json
+
+from django.core.management import BaseCommand, CommandError
+from django.forms.models import model_to_dict 
+import pandas as pd
+from sklearn.decomposition import PCA
+from sklearn.preprocessing import StandardScaler
+
+from ippidb.models import Compound, PcaBiplotData
+
+class Command(BaseCommand):
+
+    help = "Generate the data for the compound LE vs LLE biplot"
+
+    def handle(self, *args, **options):
+        self.stdout.write(self.style.SUCCESS('Generating the PCA biplot...'))
+        pca_data = []
+        features = ['aromatic_ratio', 'balaban_index', 'fsp3', 
+                    'gc_molar_refractivity', 'log_d', 'a_log_p', 
+                    'mean_atom_vol_vdw', 'molecular_weight', 'nb_acceptor_h',
+                    'nb_aliphatic_amines', 'nb_aromatic_bonds', 
+                    'nb_aromatic_ether', 'nb_aromatic_sssr', 'nb_atom', 
+                    'nb_atom_non_h', 'nb_benzene_like_rings', 'nb_bonds', 
+                    'nb_bonds_non_h', 'nb_br', 'nb_c', 'nb_chiral_centers', 
+                    'nb_circuits', 'nb_cl', 'nb_csp2', 'nb_csp3', 'nb_donor_h',
+                    'nb_double_bonds', 'nb_f', 'nb_i', 'nb_multiple_bonds', 
+                    'nb_n', 'nb_o', 'nb_rings', 'nb_rotatable_bonds', 
+                    'randic_index', 'rdf070m', 'rotatable_bond_fraction', 
+                    'sum_atom_polar', 'sum_atom_vol_vdw', 'tpsa', 'ui', 
+                    'wiener_index']
+        PcaBiplotData.objects.all().delete()
+        self.stdout.write(
+            self.style.SUCCESS('Successfully flushed PCA biplot data'))
+        l = []
+        for comp in Compound.objects.all():
+            values = model_to_dict(comp, fields=features + ['id','family'])
+            values['family'] = comp.best_pXC50_activity_ppi_family
+            l.append(values)
+        df = pd.DataFrame(l)
+        x = df.loc[:, features].values 
+        y = df.loc[:,['family']].values
+        x = StandardScaler().fit_transform(x)
+        pca = PCA(n_components=2)
+        principal_components = pca.fit_transform(x)
+        principal_df = pd.DataFrame(data = principal_components , columns = ['x', 'y'])
+        final_df = pd.concat([principal_df, df[['family','id']]], axis = 1)
+        for index, row in final_df.iterrows():
+            pca_data.append({'x': row.x, 'y': row.y, 'id': row.id, 'family_name': row.family})
+        pca_json = json.dumps(pca_data, separators=(',',':'))
+        new = PcaBiplotData()
+        new.pca_biplot_data = pca_json
+        new.save()
+        self.stdout.write(
+            self.style.SUCCESS('Successfully generated PCA biplot data'))
diff --git a/ippisite/ippidb/migrations/0034_auto_20180503_2110.py b/ippisite/ippidb/migrations/0034_auto_20180503_2110.py
new file mode 100644
index 00000000..5ed85ffc
--- /dev/null
+++ b/ippisite/ippidb/migrations/0034_auto_20180503_2110.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2018-05-03 21:10
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('ippidb', '0033_auto_20180502_1500'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='PcaBiplotData',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('pca_biplot_data', models.CharField(blank=True, max_length=150000, null=True, verbose_name='PCA biplot JSON data')),
+            ],
+        ),
+        migrations.AlterModelOptions(
+            name='ppifamily',
+            options={'verbose_name_plural': 'PPI Families'},
+        ),
+    ]
diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py
index 2a14d8ca..7d2276d2 100644
--- a/ippisite/ippidb/models.py
+++ b/ippisite/ippidb/models.py
@@ -493,6 +493,9 @@ class Compound(models.Model):
         return list(set([ca.ppi.family for ca in self.compoundaction_set.all()]))
 
 
+class PcaBiplotData(models.Model):
+    pca_biplot_data = models.CharField('PCA biplot JSON data', max_length=150000, blank=True, null=True)
+
 class LeLleBiplotData(models.Model):
     le_lle_biplot_data = models.CharField('LE-LLE biplot JSON data', max_length=150000, blank=True, null=True)
 
diff --git a/ippisite/ippidb/templates/base.html b/ippisite/ippidb/templates/base.html
index 2658a79c..5e37787e 100644
--- a/ippisite/ippidb/templates/base.html
+++ b/ippisite/ippidb/templates/base.html
@@ -93,7 +93,7 @@
                                         data: radarChartData,
                                         options: { scaleLineWidth : 1, pointLabelFontFamily : "'Helvetica Neue'", pointLabelFontSize : 12, scaleOverride : true, scaleSteps : 5, scaleStepWidth : 0.2}});
             };
-            var drawLeLleBiplotChart = function(canvasId, compoundId, compoundFamily, plotData, tabHash){
+            var drawCompoundsBiplotChart = function(canvasId, compoundId, compoundFamily, plotData, tabHash){
                 var currentCompoundData = [];
                 var currentFamilyData = [];
                 var otherFamiliesData = [];
diff --git a/ippisite/ippidb/templates/compound_card.html b/ippisite/ippidb/templates/compound_card.html
index e3ecb6d5..4f19cd73 100644
--- a/ippisite/ippidb/templates/compound_card.html
+++ b/ippisite/ippidb/templates/compound_card.html
@@ -158,6 +158,17 @@
             </div>
             </div>
             </div>
+            <div class="card col-sm-12 col-md-6">
+              <h5 class="card-header">PCA : iPPI-DB chemical space</h5>
+              <div class="card-body">
+
+              <canvas id="pca_biplot"></canvas>
+              <script>
+                drawCompoundsBiplotChart('pca_biplot', {{ compound.id }}, '{{ compound.best_pXC50_activity_ppi_family|default_if_none:"No target family identified" }}', {{ pca_biplot_data | safe }}, 'physicochemistry');
+              </script>
+            </div>
+            </div>
+            </div>
           </div>
           <div class="tab-pane fade" id="pharmacology" role="tabpanel" aria-labelledby="pharmacology-tab">
              <div class="card col-sm-12 col-md-12">
@@ -165,7 +176,7 @@
               <div class="card-body">
               <canvas id="le_lle_biplot"></canvas>
               <script>
-                drawLeLleBiplotChart('le_lle_biplot', {{ compound.id }}, '{{ compound.best_pXC50_activity_ppi_family|default_if_none:"No target family identified" }}', {{ biplot_data | safe }}, 'pharmacology');
+                drawCompoundsBiplotChart('le_lle_biplot', {{ compound.id }}, '{{ compound.best_pXC50_activity_ppi_family|default_if_none:"No target family identified" }}', {{ le_lle_biplot_data | safe }}, 'pharmacology');
               </script>
             </div>
             </div>
diff --git a/ippisite/ippidb/views.py b/ippisite/ippidb/views.py
index a8cabb85..42c4b1f1 100644
--- a/ippisite/ippidb/views.py
+++ b/ippisite/ippidb/views.py
@@ -5,7 +5,7 @@ from django.http import HttpResponseRedirect, Http404
 from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
 from formtools.wizard.views import SessionWizardView, NamedUrlSessionWizardView
 from .forms import IdForm, BibliographyForm, PDBForm, ProteinForm, ComplexCompositionForm, ComplexCompositionFormSet, ProteinDomainComplexTypeForm, ProteinDomainComplexForm, PpiForm, PpiComplexForm, PpiAndComplexForm, ProteinFormSet,TestsForm, CompoundForm, CompoundFormSet
-from .models import Protein, Bibliography, ProteinDomainComplex, ProteinDomainBoundComplex, RefCompoundBiblio, TestActivityDescription, Compound, Ppi, Disease, Taxonomy, LeLleBiplotData
+from .models import Protein, Bibliography, ProteinDomainComplex, ProteinDomainBoundComplex, RefCompoundBiblio, TestActivityDescription, Compound, Ppi, Disease, Taxonomy, LeLleBiplotData, PcaBiplotData
 from .ws import get_pdb_uniprot_mapping
 
 
@@ -237,5 +237,6 @@ def compound_card(request, compound_id):
         compound = Compound.objects.get(id=int(compound_id))
     except Compound.DoesNotExist:
         raise Http404("No compound data for %s:%s" % (compound_id))
-    biplot_data = LeLleBiplotData.objects.get().le_lle_biplot_data 
-    return render(request, 'compound_card.html', {'compound': compound, 'biplot_data': biplot_data}) 
+    le_lle_biplot_data = LeLleBiplotData.objects.get().le_lle_biplot_data 
+    pca_biplot_data = PcaBiplotData.objects.get().pca_biplot_data 
+    return render(request, 'compound_card.html', {'compound': compound, 'le_lle_biplot_data': le_lle_biplot_data, 'pca_biplot_data': pca_biplot_data}) 
diff --git a/ippisite/requirements.txt b/ippisite/requirements.txt
index 946f211d..1c592c35 100644
--- a/ippisite/requirements.txt
+++ b/ippisite/requirements.txt
@@ -12,3 +12,5 @@ django-debug-toolbar
 mod_wsgi
 bioblend
 django-allauth
+sklearn
+scipy
-- 
GitLab