Skip to content
Snippets Groups Projects
Commit 5ce2be2c authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

generate correlation circle for the PCA (#49), WIP

Former-commit-id: 414f368d52e5d67dcbc3dbcb66cd2edf04ffeb4a
parent f8e055c0
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,8 @@ import json ...@@ -4,6 +4,8 @@ import json
from django.core.management import BaseCommand, CommandError from django.core.management import BaseCommand, CommandError
from django.forms.models import model_to_dict from django.forms.models import model_to_dict
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
...@@ -49,6 +51,28 @@ class Command(BaseCommand): ...@@ -49,6 +51,28 @@ class Command(BaseCommand):
x = StandardScaler().fit_transform(x) x = StandardScaler().fit_transform(x)
pca = PCA(n_components=2) pca = PCA(n_components=2)
principal_components = pca.fit_transform(x) principal_components = pca.fit_transform(x)
print(pca.explained_variance_)
print(pca.components_)
variance_ratio = pd.Series(pca.explained_variance_ratio_)
coef = np.transpose(pca.components_)
cols = ['PC-'+str(x) for x in range(len(variance_ratio))]
pc_infos = pd.DataFrame(coef, columns=cols, index=features) #pd.DataFrame(data=x).columns)
plt.Circle((0,0),radius=10, color='g', fill=False)
circle1 = plt.Circle((0,0),radius=1, color='g', fill=False)
fig = plt.gcf()
fig.gca().add_artist(circle1)
for idx in range(len(pc_infos["PC-0"])):
x = pc_infos["PC-0"][idx]
y = pc_infos["PC-1"][idx]
plt.plot([0.0,x],[0.0,y],'k-')
plt.plot(x, y, 'rx')
plt.annotate(pc_infos.index[idx], xy=(x,y))
plt.xlabel("PC-0 (%s%%)" % str(variance_ratio[0])[:4].lstrip("0."))
plt.ylabel("PC-1 (%s%%)" % str(variance_ratio[1])[:4].lstrip("0."))
plt.xlim((-1,1))
plt.ylim((-1,1))
plt.title("Circle of Correlations")
plt.savefig('foo2.png')
principal_df = pd.DataFrame(data = principal_components , columns = ['x', 'y']) principal_df = pd.DataFrame(data = principal_components , columns = ['x', 'y'])
final_df = pd.concat([principal_df, df[['family','id']]], axis = 1) final_df = pd.concat([principal_df, df[['family','id']]], axis = 1)
for index, row in final_df.iterrows(): for index, row in final_df.iterrows():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment