Skip to content
Snippets Groups Projects
Commit 5ce2be2c authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

generate correlation circle for the PCA (#49), WIP

Former-commit-id: 414f368d52e5d67dcbc3dbcb66cd2edf04ffeb4a
parent f8e055c0
No related branches found
No related tags found
No related merge requests found
......@@ -4,6 +4,8 @@ import json
from django.core.management import BaseCommand, CommandError
from django.forms.models import model_to_dict
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
......@@ -49,6 +51,28 @@ class Command(BaseCommand):
x = StandardScaler().fit_transform(x)
pca = PCA(n_components=2)
principal_components = pca.fit_transform(x)
print(pca.explained_variance_)
print(pca.components_)
variance_ratio = pd.Series(pca.explained_variance_ratio_)
coef = np.transpose(pca.components_)
cols = ['PC-'+str(x) for x in range(len(variance_ratio))]
pc_infos = pd.DataFrame(coef, columns=cols, index=features) #pd.DataFrame(data=x).columns)
plt.Circle((0,0),radius=10, color='g', fill=False)
circle1 = plt.Circle((0,0),radius=1, color='g', fill=False)
fig = plt.gcf()
fig.gca().add_artist(circle1)
for idx in range(len(pc_infos["PC-0"])):
x = pc_infos["PC-0"][idx]
y = pc_infos["PC-1"][idx]
plt.plot([0.0,x],[0.0,y],'k-')
plt.plot(x, y, 'rx')
plt.annotate(pc_infos.index[idx], xy=(x,y))
plt.xlabel("PC-0 (%s%%)" % str(variance_ratio[0])[:4].lstrip("0."))
plt.ylabel("PC-1 (%s%%)" % str(variance_ratio[1])[:4].lstrip("0."))
plt.xlim((-1,1))
plt.ylim((-1,1))
plt.title("Circle of Correlations")
plt.savefig('foo2.png')
principal_df = pd.DataFrame(data = principal_components , columns = ['x', 'y'])
final_df = pd.concat([principal_df, df[['family','id']]], axis = 1)
for index, row in final_df.iterrows():
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment