diff --git a/ippisite/ippidb/management/commands/pca.py b/ippisite/ippidb/management/commands/pca.py index f752538ec1d975631efb1fa98cfa9c58040c4b15..c64f46800412f49a3db67018a9b28ccda924327b 100644 --- a/ippisite/ippidb/management/commands/pca.py +++ b/ippisite/ippidb/management/commands/pca.py @@ -4,6 +4,8 @@ import json from django.core.management import BaseCommand, CommandError from django.forms.models import model_to_dict import pandas as pd +import matplotlib.pyplot as plt +import numpy as np from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler @@ -49,6 +51,28 @@ class Command(BaseCommand): x = StandardScaler().fit_transform(x) pca = PCA(n_components=2) principal_components = pca.fit_transform(x) + print(pca.explained_variance_) + print(pca.components_) + variance_ratio = pd.Series(pca.explained_variance_ratio_) + coef = np.transpose(pca.components_) + cols = ['PC-'+str(x) for x in range(len(variance_ratio))] + pc_infos = pd.DataFrame(coef, columns=cols, index=features) #pd.DataFrame(data=x).columns) + plt.Circle((0,0),radius=10, color='g', fill=False) + circle1 = plt.Circle((0,0),radius=1, color='g', fill=False) + fig = plt.gcf() + fig.gca().add_artist(circle1) + for idx in range(len(pc_infos["PC-0"])): + x = pc_infos["PC-0"][idx] + y = pc_infos["PC-1"][idx] + plt.plot([0.0,x],[0.0,y],'k-') + plt.plot(x, y, 'rx') + plt.annotate(pc_infos.index[idx], xy=(x,y)) + plt.xlabel("PC-0 (%s%%)" % str(variance_ratio[0])[:4].lstrip("0.")) + plt.ylabel("PC-1 (%s%%)" % str(variance_ratio[1])[:4].lstrip("0.")) + plt.xlim((-1,1)) + plt.ylim((-1,1)) + plt.title("Circle of Correlations") + plt.savefig('foo2.png') principal_df = pd.DataFrame(data = principal_components , columns = ['x', 'y']) final_df = pd.concat([principal_df, df[['family','id']]], axis = 1) for index, row in final_df.iterrows():