From 5ce2be2cf44c15f4733a01f9252af20abd90ec78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20=20MENAGER?= <herve.menager@pasteur.fr>
Date: Sun, 6 May 2018 20:40:49 +0200
Subject: [PATCH] generate correlation circle for the PCA (#49), WIP

Former-commit-id: 414f368d52e5d67dcbc3dbcb66cd2edf04ffeb4a
---
 ippisite/ippidb/management/commands/pca.py | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/ippisite/ippidb/management/commands/pca.py b/ippisite/ippidb/management/commands/pca.py
index f752538e..c64f4680 100644
--- a/ippisite/ippidb/management/commands/pca.py
+++ b/ippisite/ippidb/management/commands/pca.py
@@ -4,6 +4,8 @@ import json
 from django.core.management import BaseCommand, CommandError
 from django.forms.models import model_to_dict 
 import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
 from sklearn.decomposition import PCA
 from sklearn.preprocessing import StandardScaler
 
@@ -49,6 +51,28 @@ class Command(BaseCommand):
         x = StandardScaler().fit_transform(x)
         pca = PCA(n_components=2)
         principal_components = pca.fit_transform(x)
+        print(pca.explained_variance_)
+        print(pca.components_)
+        variance_ratio = pd.Series(pca.explained_variance_ratio_)
+        coef = np.transpose(pca.components_)
+        cols = ['PC-'+str(x) for x in range(len(variance_ratio))]
+        pc_infos = pd.DataFrame(coef, columns=cols, index=features) #pd.DataFrame(data=x).columns)
+        plt.Circle((0,0),radius=10, color='g', fill=False)
+        circle1 = plt.Circle((0,0),radius=1, color='g', fill=False)
+        fig = plt.gcf()
+        fig.gca().add_artist(circle1)
+        for idx in range(len(pc_infos["PC-0"])):
+            x = pc_infos["PC-0"][idx]
+            y = pc_infos["PC-1"][idx]
+            plt.plot([0.0,x],[0.0,y],'k-')
+            plt.plot(x, y, 'rx')
+            plt.annotate(pc_infos.index[idx], xy=(x,y))
+        plt.xlabel("PC-0 (%s%%)" % str(variance_ratio[0])[:4].lstrip("0."))
+        plt.ylabel("PC-1 (%s%%)" % str(variance_ratio[1])[:4].lstrip("0."))
+        plt.xlim((-1,1))
+        plt.ylim((-1,1))
+        plt.title("Circle of Correlations") 
+        plt.savefig('foo2.png')
         principal_df = pd.DataFrame(data = principal_components , columns = ['x', 'y'])
         final_df = pd.concat([principal_df, df[['family','id']]], axis = 1)
         for index, row in final_df.iterrows():
-- 
GitLab