Select Git revision
univariate.py
univariate.py 2.80 KiB
### Univariate tests ###
import numpy as np
import pandas as pd
# from sklearn.preprocessing import scale
from tools.preprocessing_tools import scale
from tqdm import tqdm
import tools.preprocessing_tools as pt
from tools.compute_manova import linear_regression, ttest
from plotly.subplots import make_subplots
import plotly.graph_objects as go
class UNIVARIATE :
def __init__(self, outputs, predictors, covariates=None, cols_outputs = None,
cols_predictors = None, cols_covariates = None, L_preproc = [], use_resid = True):
### Initializing
self.outputs = outputs
self.cols_outputs = cols_outputs
self.outputs, self.cols_outputs = pt._extract_cols(self.outputs, self.cols_outputs)
if len(L_preproc)>0:
self.outputs = pt.pipeline(self.outputs, L_pipe = L_preproc)
self.outputs = scale(self.outputs)
self.predictors = predictors
self.cols_predictors = cols_predictors
self.predictors, self.cols_predictors = pt._extract_cols(self.predictors, self.cols_predictors)
self.predictors = scale(self.predictors)
self.covariates = covariates
self.cols_covariates = cols_covariates
if covariates is not None :
self.covariates, self.cols_covariates = pt._extract_cols(self.covariates, self.cols_covariates)
self.covariates = scale(self.covariates)
self.use_resid = use_resid
if use_resid and isinstance(covariates, np.ndarray):
print("computing residuals")
self.outputs = np.apply_along_axis(lambda x : pt.adjust_covariates(x,covariates), axis = 0, arr = self.outputs)
self.p = None
self.beta = None
def test(self, **kwargs):
nan_mask = np.isnan(self.predictors).flatten()
if ((self.covariates is not None) and (self.use_resid == False)) :
print(self.outputs.shape)
beta = linear_regression(self.outputs[~nan_mask], self.predictors[~nan_mask], self.covariates[~nan_mask])
else :
beta = linear_regression(self.outputs[~nan_mask], self.predictors[~nan_mask])
self.beta = beta
res = ttest(beta, self.predictors[~nan_mask], self.outputs[~nan_mask])
self.p = res
def plot(self, show = True):
fig = make_subplots(rows = 1,cols=1)
fig.update_layout(title = "Plot of all univariate tests")
fig.add_trace(
go.Scatter(x = self.cols_outputs, y=-np.log10(self.p), name = "Univariate tests", mode='markers'),
row=1,col=1
)
fig.add_trace(
go.Scatter(x = self.cols_outputs, y = [-np.log10(0.05/len(self.cols_outputs))]*len(self.cols_outputs), name = 'Bonferroni threshold'),
row=1,col=1
)
if show:
fig.show()
else :
return fig