Commit 6590d845 authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

set black arrows for PCA correlation circle, alpha is arrow length

parent a3f8a79e
import json
import io
import base64
import itertools
import math
from django.core.management import BaseCommand
from django.forms.models import model_to_dict
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
......@@ -28,50 +31,6 @@ class Command(BaseCommand):
def handle(self, *args, **options):
self.stdout.write(self.style.SUCCESS("Generating the PCA biplot..."))
pca_data = []
features = [
"aromatic_ratio",
"balaban_index",
"fsp3",
"gc_molar_refractivity",
"log_d",
"a_log_p",
"mean_atom_vol_vdw",
"molecular_weight",
"nb_acceptor_h",
"nb_aliphatic_amines",
"nb_aromatic_bonds",
"nb_aromatic_ether",
"nb_aromatic_sssr",
"nb_atom",
"nb_atom_non_h",
"nb_benzene_like_rings",
"nb_bonds",
"nb_bonds_non_h",
"nb_br",
"nb_c",
"nb_chiral_centers",
"nb_circuits",
"nb_cl",
"nb_csp2",
"nb_csp3",
"nb_donor_h",
"nb_double_bonds",
"nb_f",
"nb_i",
"nb_multiple_bonds",
"nb_n",
"nb_o",
"nb_rings",
"nb_rotatable_bonds",
"randic_index",
"rdf070m",
"rotatable_bond_fraction",
"sum_atom_polar",
"sum_atom_vol_vdw",
"tpsa",
"ui",
"wiener_index",
]
features = [
"molecular_weight",
"a_log_p",
......@@ -90,7 +49,7 @@ class Command(BaseCommand):
"nb_multiple_bonds",
"nb_aromatic_bonds",
"aromatic_ratio",
] # MISSING: ArRing - Number of Aromatic ring
]
PcaBiplotData.objects.all().delete()
self.stdout.write(self.style.SUCCESS("Successfully flushed PCA biplot data"))
values_list = []
......@@ -123,13 +82,16 @@ class Command(BaseCommand):
pc_infos = pd.DataFrame(
coef, columns=cols, index=features
)
# we might remove the line below if the PCA remains grayscale
pal = itertools.cycle(sns.color_palette("dark", len(features)))
for idx in range(len(pc_infos["PC-0"])):
x = pc_infos["PC-0"][idx]
y = pc_infos["PC-1"][idx]
plt.plot([0.0, x], [0.0, y], "k-")
plt.plot(x, y, 'h', color='cadetblue')
plt.annotate(pc_infos.index[idx], xy=(x,y), xycoords='data',
xytext=np.asarray((x,y))+(0.02,-0.02))
color = "black"
alpha = math.sqrt(abs(x)*abs(x)+abs(y)*abs(y))
plt.arrow(0.0, 0.0, x, y, head_width=0.02, color="black", alpha=alpha)
plt.annotate(Compound._meta.get_field(pc_infos.index[idx]).verbose_name, xy=(x,y), xycoords='data',
xytext=np.asarray((x,y))+(0.02,-0.02), fontsize=6, color=color, alpha=alpha)
plt.xlabel("PC-0 (%s%%)" % str(variance_ratio[0])[:4].lstrip("0."))
plt.ylabel("PC-1 (%s%%)" % str(variance_ratio[1])[:4].lstrip("0."))
plt.xlim((-1, 1))
......
......@@ -18,6 +18,7 @@ bs4
scikit-learn==0.20.3 #later versions do not support Python<3.5
scipy==1.2.1 #Scipy >=1.3 does not support Python<3.5
matplotlib==2.2.3 #Matplotlib 3.0+ does not support Python 2.x, 3.0, 3.1, 3.2, 3.3, or 3.4.
seaborn
# postgres driver
psycopg2-binary
git+https://gitlab.pasteur.fr/hmenager/django-diu.git#egg=django_diu
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment