diff --git a/python/src/manocca.py b/python/src/manocca.py index d32567c8e14c86d2c649f0db79ae9997736edef0..54028ff3abbefb06c91f201406892e5546868139 100644 --- a/python/src/manocca.py +++ b/python/src/manocca.py @@ -110,7 +110,7 @@ class MANOCCA: def __init__(self, predictors, outputs, covariates=None, cols_outputs = None, cols_predictors = None, cols_covariates = None, prodV_red=None, n_comp = None, prod_to_keep = None, use_resid = True, use_pca = True, - adj_pred = False, use_extended = False, corr_bias = False, n_jobs = 1, verbose = 1): + adj_pred = False, use_extended = False, corr_bias = False, n_jobs = 1, apply_qt=True, return_fisher = False, verbose = 1): ### Initializing self.outputs = outputs @@ -143,15 +143,21 @@ class MANOCCA: self.use_pca = use_pca self.use_extended = use_extended self.adj_pred = adj_pred + self.apply_qt = apply_qt + self.return_fisher = return_fisher self.bias = None self.corr_bias = corr_bias + if corr_bias : # if we want to use the correction, we can't ust the qt transformation + self.apply_qt = False + # Filled later self.prodV = None self.prodV_red = None self.pca = None self.p = None + self.fisher = None # Filtering product columns self.prod_to_keep = prod_to_keep @@ -184,9 +190,9 @@ class MANOCCA: print(self.prodV.shape) else : if self.corr_bias: - self.prodV = self.get_prodV_para_wrap(self.outputs, apply_qt=False) + self.prodV = self.get_prodV_para_wrap(self.outputs, apply_qt=False) #Always false for corr_bias as qt can bring back the bias else: - self.prodV = self.get_prodV_para_wrap(self.outputs, apply_qt=True) + self.prodV = self.get_prodV_para_wrap(self.outputs, apply_qt=self.apply_qt) if self.verbose>0 : print(self.prodV.shape) if not isinstance(self.prod_to_keep, type(None)): # Filtering out some columns @@ -233,12 +239,12 @@ class MANOCCA: pred = self.predictors[:,i_pred] beta = np.apply_along_axis(lambda x : linear_regression(Y_tmp, x.reshape(-1,1)).flatten(), axis = 0, arr = pred) beta = beta.reshape(-1,1) - prod_beta = self.get_prodV_para_wrap(beta.T, apply_qt = False, job_overide = True) + prod_beta = self.get_prodV_para_wrap(beta.T, apply_qt = self.apply_qt, job_overide = True) self.prod_beta = prod_beta self.bias = prod_beta*(pred*pred).reshape(-1,1) - def get_prodV_para_effi(self, DD0, job_id, nb_compute, apply_qt = True): + def get_prodV_para_effi(self, DD0, job_id, nb_compute, apply_qt): """ Computes the product matrix for a set of outputs and a given number of jobs @@ -264,14 +270,13 @@ class MANOCCA: tmp = (DD0[:,(i+1):]*DD0[:,i].reshape(-1,1)) # std = tmp.std(axis = 0) if apply_qt: - # print('coucou') tmp = pt.get_qt(tmp) # tmp = (std) * tmp L_prodV += [tmp] return L_prodV - def get_prodV_para_wrap(self, DD0, apply_qt = True, job_overide = False ) : + def get_prodV_para_wrap(self, DD0, apply_qt, job_overide = False ) : """ Computes the product matrix for a set of outputs This function wraps the parallel processing to compute the product matrix. It parameters the get_prodV_para_effi function depending on the parralel status. @@ -302,7 +307,7 @@ class MANOCCA: if self.verbose>0 : print("Parallel computation of prodV") nb_compute = cpu_count() - res = Parallel(n_jobs=n_jobs, verbose = self.verbose)(delayed(self.get_prodV_para_effi)(DD0,j, nb_compute, apply_qt = apply_qt) for j in range(nb_compute)) + res = Parallel(n_jobs=n_jobs, verbose = self.verbose)(delayed(self.get_prodV_para_effi)(DD0,j, nb_compute, apply_qt = self.apply_qt) for j in range(nb_compute)) # reordering res_ordered = [] for _ in range(len(res[0])): @@ -453,6 +458,10 @@ class MANOCCA: n_preds = var.shape[1] p = np.empty((n_preds,1)) + if self.return_fisher: + fisher = np.empty((n_preds,1)) + + for i_pred in range(n_preds): nan_mask = np.isnan(var[:,i_pred]) @@ -467,14 +476,19 @@ class MANOCCA: if ((self.covariates is not None) and (self.use_resid == False)) : # print("With covariates") - p[i_pred] = custom_mancova(scale(self.prodV_red[~nan_mask,min_comp:n_comp]), scale(var[~nan_mask,i_pred].reshape(-1,1)), scale(self.covariates[~nan_mask,:])) # need to dissociate case with and without bias #[i_pred,:].reshape(-1,1)) + if self.return_fisher : + p[i_pred], fisher[i_pred] = custom_mancova(scale(self.prodV_red[~nan_mask,min_comp:n_comp]), scale(var[~nan_mask,i_pred].reshape(-1,1)), scale(self.covariates[~nan_mask,:]), return_fisher=True) + else : + p[i_pred] = custom_mancova(scale(self.prodV_red[~nan_mask,min_comp:n_comp]), scale(var[~nan_mask,i_pred].reshape(-1,1)), scale(self.covariates[~nan_mask,:])) else : # print("Without covariates") - if not isinstance(self.bias, type(None)): - p[i_pred] = custom_manova(self.prodV_red[~nan_mask,min_comp:n_comp], var[~nan_mask,i_pred].reshape(-1,1))#[i_pred,:].reshape(-1,1)) + if self.return_fisher : + p[i_pred], fisher[i_pred] = custom_manova(self.prodV_red[~nan_mask,min_comp:n_comp], var[~nan_mask,i_pred].reshape(-1,1), return_fisher=True) else : p[i_pred] = custom_manova(self.prodV_red[~nan_mask,min_comp:n_comp], var[~nan_mask,i_pred].reshape(-1,1)) self.p = p + if self.return_fisher: + self.fisher = fisher diff --git a/python/src/manova.py b/python/src/manova.py index 954b87b3033150293deba3b847dcb51f131c3e6b..ded465e3cb95d3a36747875e3a561a9c4b0f89bd 100644 --- a/python/src/manova.py +++ b/python/src/manova.py @@ -11,7 +11,7 @@ from tools.compute_manova import custom_manova class MANOVA : def __init__(self, predictors, outputs, covariates=None, cols_outputs = None, - cols_predictors = None, cols_covariates = None, use_resid = True): + cols_predictors = None, cols_covariates = None, use_resid = True, return_fisher=False): ### Initializing self.outputs = outputs self.cols_outputs = cols_outputs @@ -21,7 +21,6 @@ class MANOVA : self.predictors = predictors self.cols_predictors = cols_predictors self.predictors, self.cols_predictors = pt._extract_cols(self.predictors, self.cols_predictors) - self.predictors = scale(self.predictors) @@ -38,7 +37,11 @@ class MANOVA : # print("adjusting predictor") # self.predictors = np.apply_along_axis(lambda x : pt.adjust_covariates(x,covariates), axis = 0, arr = self.predictors) + self.return_fisher = return_fisher + # For results + self.p = None + self.fisher = None @@ -50,15 +53,26 @@ class MANOVA : nb_tests = var.shape[1] p = np.empty((nb_tests,1)) + if self.return_fisher: + fisher = np.empty((nb_tests,1)) + for i_pred in range(nb_tests): nan_mask = np.isnan(var[:,i_pred]) if ((self.covariates is not None) and (self.use_resid == False)) : # print("With covariates") - p[i_pred] = custom_manova(self.outputs[~nan_mask,:], var[~nan_mask,i_pred].reshape(-1,1), self.covariates[~nan_mask,:]) + if self.return_fisher : + p[i_pred], fisher[i_pred] = custom_manova(self.outputs[~nan_mask,:], var[~nan_mask,i_pred].reshape(-1,1), self.covariates[~nan_mask,:], return_fisher=self.return_fisher) + else : + p[i_pred] = custom_manova(self.outputs[~nan_mask,:], var[~nan_mask,i_pred].reshape(-1,1), self.covariates[~nan_mask,:]) else : # print("Without covariates") - p[i_pred] = custom_manova(self.outputs[~nan_mask,:], var[~nan_mask,i_pred].reshape(-1,1)) + if self.return_fisher : + p[i_pred], fisher[i_pred] = custom_manova(self.outputs[~nan_mask,:], var[~nan_mask,i_pred].reshape(-1,1), return_fisher=self.return_fisher) + else : + p[i_pred] = custom_manova(self.outputs[~nan_mask,:], var[~nan_mask,i_pred].reshape(-1,1)) self.p = p + if self.return_fisher: + self.fisher = fisher diff --git a/python/src/tools/compute_manova.py b/python/src/tools/compute_manova.py index 14dbc79ae18e6d3a9177f3a22ebebf2b84daa94c..d31810e9f521b281eb2a933a7a6e8850e61ff9ff 100644 --- a/python/src/tools/compute_manova.py +++ b/python/src/tools/compute_manova.py @@ -6,7 +6,7 @@ from scipy.stats import f # from sklearn.preprocessing import scale from .preprocessing_tools import scale -def custom_manova(Y,X,C=None, return_beta = False, bias = None): +def custom_manova(Y,X,C=None, return_beta = False, bias = None, return_fisher=False): ### /!\ data needs to be scaled, X needs to be (k,1) beta = linear_regression(Y, X, C) dot_Y = np.dot(Y.T,Y) @@ -18,10 +18,12 @@ def custom_manova(Y,X,C=None, return_beta = False, bias = None): fisher = (Y.shape[0]-Y.shape[1]-1)/(Y.shape[1])*(1-lamb)/lamb - if return_beta == False : - return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1) - else : + if return_beta == True : return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1), beta + elif return_fisher == True : + return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1), fisher + else : + return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1) def custom_mancova(Y,X,C=None): @@ -39,16 +41,19 @@ def custom_mancova(Y,X,C=None): -def linear_regression(Y, X, C=None): +def linear_regression(Y, X, C=None, return_all = False): ones = np.ones((X.shape[0],1)) if C is not None: X_C = np.hstack([ones,C,X]) beta = np.dot(np.linalg.inv(np.dot(X_C.T,X_C)), np.dot(X_C.T, Y)) - beta = beta[-1,:].reshape(-1,1) #-1 + if return_all == False: + beta = beta[-1,:].reshape(-1,1) #-1 + else : X = np.hstack([ones,X]) #,ones]) #[C,X] beta = np.dot(np.linalg.inv(np.dot(X.T,X)) , np.dot(X.T,Y)) - beta = beta[-1,:].reshape(-1,1) + if return_all == False: + beta = beta[-1,:].reshape(-1,1) return beta diff --git a/python/src/univariate.py b/python/src/univariate.py index 2cbbf2180c81517d688d5f81a6c9785b2bb48a8f..9063243ff1036f35a6ad85b15267f1ffdd1dc8cc 100644 --- a/python/src/univariate.py +++ b/python/src/univariate.py @@ -31,8 +31,6 @@ class UNIVARIATE : self.predictors, self.cols_predictors = pt._extract_cols(self.predictors, self.cols_predictors) self.predictors = scale(self.predictors) - - self.covariates = covariates self.cols_covariates = cols_covariates if covariates is not None : @@ -52,14 +50,15 @@ class UNIVARIATE : self.p = [] for i in range(self.predictors.shape[1]): pred = self.predictors[:,i].reshape(-1,1) + # print(pred.shape) nan_mask = np.isnan(pred).flatten() if ((self.covariates is not None) and (self.use_resid == False)) : print(self.outputs.shape) - beta = linear_regression(self.outputs[~nan_mask], pred[~nan_mask], self.covariates[~nan_mask]) + beta = linear_regression(self.outputs[~nan_mask,:], pred[~nan_mask,:], self.covariates[~nan_mask]) else : - beta = linear_regression(self.outputs[~nan_mask], pred[~nan_mask]) + beta = linear_regression(self.outputs[~nan_mask,:], pred[~nan_mask,:]) self.beta = beta - res = ttest(beta, pred[~nan_mask], self.outputs[~nan_mask]) + res = ttest(beta, pred[~nan_mask,:], self.outputs[~nan_mask,:]) self.p += [res]