diff --git a/python/src/alpha_diversity.py b/python/src/alpha_diversity.py
new file mode 100644
index 0000000000000000000000000000000000000000..f30ae056129268763a33f39f099cfdb617568645
--- /dev/null
+++ b/python/src/alpha_diversity.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pandas as pd
+
+# from sklearn.preprocessing import scale
+from tools.preprocessing_tools import scale
+from tools.diversity_metrics import shannon_diversity, simpson_diversity
+from tools.compute_manova import linear_regression, ttest
+
+from tqdm import tqdm
+
+
+
+import tools.preprocessing_tools as pt
+from tools.compute_manova import custom_manova
+
+class ALPHA_DIVERSITY :
+    def __init__(self, predictors, outputs, covariates=None, cols_outputs = None,
+                 cols_predictors = None, cols_covariates = None, metric = "Simpson", use_resid = True):
+        ### Initializing 
+
+        self.metric = metric
+
+        self.outputs = outputs
+        self.cols_outputs = cols_outputs
+        self.outputs, self.cols_outputs = pt._extract_cols(self.outputs, self.cols_outputs)
+        self._apply_metric()
+        self.outputs = scale(self.outputs)
+
+
+        self.predictors = predictors
+        self.cols_predictors = cols_predictors
+        self.predictors, self.cols_predictors = pt._extract_cols(self.predictors, self.cols_predictors)
+        self.predictors = scale(self.predictors)
+
+
+
+        self.covariates = covariates
+        self.cols_covariates = cols_covariates
+        if covariates is not None :
+            self.covariates, self.cols_covariates = pt._extract_cols(self.covariates, self.cols_covariates) 
+            self.covariates = scale(self.covariates)
+
+        self.use_resid = use_resid
+        if use_resid and isinstance(self.covariates, np.ndarray):
+            print("computing residuals")
+            self.outputs = np.apply_along_axis(lambda x : pt.adjust_covariates(x,covariates), axis = 0, arr = self.outputs) 
+
+
+    def _apply_metric(self):      
+        if self.metric == 'Simpson':
+            self.outputs = np.apply_along_axis(lambda x : simpson_diversity(x), axis = 1, arr = self.outputs).reshape(-1,1)
+        elif self.metric == 'Shannon':
+            self.outputs = np.apply_along_axis(lambda x : shannon_diversity(x), axis = 1, arr = self.outputs).reshape(-1,1)
+        else :
+            print("Unknown metric")
+
+    def test(self, var = None, **kwargs):
+        if isinstance(var, type(None)) :
+            var = self.predictors
+        else :
+            var = scale(var).reshape(-1,1)
+
+        nb_tests = var.shape[1] #self.predictors.shape[1]
+        p = np.empty((nb_tests,1))
+        for i_pred in range(nb_tests):
+            nan_mask = np.isnan(var[:,i_pred])
+            if ((self.covariates is not None) and (self.use_resid == False)) :
+                # print("With covariates")
+                beta = linear_regression(self.outputs[~nan_mask], var[~nan_mask,i_pred].reshape(-1,1), self.covariates[~nan_mask,:])
+                p[i_pred] = ttest(beta, var[~nan_mask, i_pred], self.outputs[~nan_mask])
+            else :
+                # print("Without covariates")
+                beta = linear_regression(self.outputs[~nan_mask,:], var[~nan_mask,i_pred].reshape(-1,1))
+                p[i_pred] = ttest(beta, var[~nan_mask, i_pred].reshape(-1,1), self.outputs[~nan_mask])
+        self.p = p
+
+
+
+"""
+        nan_mask = np.isnan(self.predictors).flatten()
+        if ((self.covariates is not None) and (self.use_resid == False)) :
+            print(self.outputs.shape)
+            beta = linear_regression(self.outputs[~nan_mask], self.predictors[~nan_mask], self.covariates[~nan_mask])
+        else :
+            beta = linear_regression(self.outputs[~nan_mask], self.predictors[~nan_mask])
+        self.beta = beta
+        res = ttest(beta, self.predictors[~nan_mask], self.outputs[~nan_mask])
+        self.p = res
+"""
+
+# ### TESTS ###
+
+# a = pd.DataFrame(np.random.randint(0,10,(10,5)), columns = ["cols_" + str(i) for i in range(5)])
+# b = pd.DataFrame(np.random.randint(0,10,(10,5)), columns = ["cols_pred_" + str(i) for i in range(5)])
+# b = b.iloc[:,0]
+# print(a)
+
+# manova = MANOVA(a,b, cols_predictors = ['a'])
+# manova.test()
+# print(manova.p)
\ No newline at end of file
diff --git a/python/src/explainer.py b/python/src/explainer.py
index dcb9e6a9668ed0bd6f020ddea38493e166369af8..30cdd1f10eaf741543e10ee24ac602a7a140745f 100644
--- a/python/src/explainer.py
+++ b/python/src/explainer.py
@@ -6,19 +6,58 @@ from manova import MANOVA
 from univariate import UNIVARIATE
 
 from tools.h_clustering import cluster_corr
+from tools.build_graph import get_tree, build_graph
 
 import plotly.express as px
 from plotly.subplots import make_subplots
 import plotly.graph_objects as go
 # import matplotlib.pyplot as plt
 
+from tqdm import tqdm
+
 class Explainer :
+    """ Explainer for the MANOCCA model
+
+    In order to give more insights into the test, the Explainer class provides a set of method to help understand the way MANOCCA performed. 
+
+    Parameters
+    ----------
+    model : MANOCCA class
+        An initialized MANOCCA instance. Can be retrieved from the Simulation class using : simu.get_model(0), where MANOCCA is the first model from L_methods
+
+    
+    Attributes
+    ----------
+    model : MANOCCA instance
+    covMat : numpy array, covariance matrix for all outputs.
+    df_loadings : pandas DataFrame, dataframe with the PCA loadings.
+
+    """
+
+
+
+
     def __init__(self, model):
         self.model = model
 
         self.covMat = None
         self.cols_covMat = None
 
+        self.df_loadings = None
+
+
+    def _get_loadings(self):
+        # Retrieving PCA loadings
+        pca = self.model.pca
+        cols_outputs = np.array(self.model.cols_outputs.copy())
+
+        cols_prod = []
+        for i in range(len(cols_outputs)):
+            for j in range(i+1,len(cols_outputs)):
+                cols_prod += [str(cols_outputs[i]) + '|'+ str(cols_outputs[j])]
+        self.df_loadings = pd.DataFrame(np.abs(pca.components_), columns = cols_prod)
+
+
     def power_pc_kept(self, pred, grid  = None, max_n_comp = None, plot = True):
         i_pred = self.model.cols_predictors.index(pred)
         var = self.model.predictors[:,i_pred].reshape(-1,1)
@@ -47,17 +86,14 @@ class Explainer :
         !!! pc_num starts at 0 !!! 
         """
         cols_outputs = np.array(self.model.cols_outputs.copy())
-        # Retrieving PCA loadings
-        pca = self.model.pca
+        
 
-        cols_prod = []
-        for i in range(len(cols_outputs)):
-            for j in range(i+1,len(cols_outputs)):
-                cols_prod += [str(cols_outputs[i]) + 'x'+ str(cols_outputs[j])]
-        df_loadings = pd.DataFrame(np.abs(pca.components_), columns = cols_prod)
+        if isinstance(self.df_loadings, type(None)):
+            self._get_loadings()
+
+        tmp = self.df_loadings.iloc[pc_num,:]
+        tmp.sort_values(ascending = False, inplace = True) # loadings are already taking in absolute value in _get_loadings()   , key = pd.Series.abs)
 
-        tmp = df_loadings.iloc[pc_num,:]
-        tmp.sort_values(ascending = False, inplace = True)
         top_loadings = tmp.copy()
 
 
@@ -74,10 +110,12 @@ class Explainer :
 
 
 
-        mat = np.array([[np.nan]*covMat.shape[0]]*covMat.shape[1])
+        # mat = np.array([[np.nan]*covMat.shape[0]]*covMat.shape[1])
+        mat = np.empty((covMat.shape[0],covMat.shape[1]))
+        mat.fill(np.nan) # seemed to be the quickest to create a Nxk NaN matrix
 
         for k in range(max_loadings):
-            el_ = top_loadings.index[k].split("x")
+            el_ = top_loadings.index[k].split("|")
 
             cols_covMat = cols_covMat.astype(str)
             i = np.where(cols_covMat == str(el_[0]))[0][0]
@@ -87,16 +125,21 @@ class Explainer :
             mat[j,i] = covMat[j,i]
 
 
+        mat = pd.DataFrame(mat, columns = cols_covMat, index = cols_covMat)
+        covMat = pd.DataFrame(covMat, columns = cols_covMat, index = cols_covMat)
+
+        # display(px.imshow(covMat))
+
         fig = make_subplots(rows=1, cols=2)
         fig.update_layout(
-            title = {
-                'text' : "Loadings of the %ith PC" %pc_num,
-                'y':0.9,
-                'x':0.5,    
-                'font' : dict(family = "Arial", size = 24),
-                'xanchor' : 'center',
-                'yanchor' : 'top'
-                },
+            # title = {
+            #     'text' : "Loadings of the %ith PC" %pc_num,
+            #     'y':0.9,
+            #     'x':0.5,    
+            #     'font' : dict(family = "Arial", size = 24),
+            #     'xanchor' : 'center',
+            #     'yanchor' : 'top'
+            #     },
             coloraxis=dict(colorscale='thermal', cmin = -1, cmax = 1)) #coloraxis_autocolorscale=False)
 
         fig.add_trace(
@@ -108,21 +151,87 @@ class Explainer :
             px.imshow(covMat, zmin = -1, zmax = 1).data[0],  #, color_continuous_scale = 'Darkmint'
             row=1, col=2
         )
+        xmin, xmax = 0,371
+        ymin, ymax = 371, 0
+        fig.update_xaxes(range=[xmin, xmax],autorange=False)
+        fig.update_yaxes(range=[ymin, ymax], autorange=False)
         if plot :
             fig.show()
-        else :
-            return fig
+        
+        return mat
+
 
 
-    def univariate_cov(self, var, plot = True):
+
+    def univariate_cov(self, var, n_comp, plot = True, return_beta = False):
         m = self.model
 
         i_var = m.cols_predictors.index(var)
 
-        univ = UNIVARIATE(m.prodV_red, m.predictors[:,i_var].reshape(-1,1), cols_outputs = ['PC_' + str(i) for i in range(m.prodV_red.shape[1])], cols_predictors = ['var'])
+        univ = UNIVARIATE(m.prodV_red[:,:n_comp], m.predictors[:,i_var].reshape(-1,1), cols_outputs = ['PC_' + str(i) for i in range(n_comp)], cols_predictors = ['var'])
         univ.test()
 
         if plot:
             univ.plot()
 
-        return univ.p
\ No newline at end of file
+        if return_beta :
+            return univ.p, univ.beta
+        else :
+            return univ.p
+
+
+    def feature_importances(self, var, n_comp, threshold = 'all', return_raw_contrib = False) :
+        m = self.model
+        i_var = m.cols_predictors.index(var)
+        N_samples = np.sum(~np.isnan(m.predictors[:,i_var])) #retrieve sample (non-NaN values) size for the considered variable
+        if isinstance(self.df_loadings, type(None)):
+            self._get_loadings()
+
+        p, beta = self.univariate_cov(var, n_comp, plot = False, return_beta = True)
+        # print(p.shape)
+        df_p_beta = pd.DataFrame(p, columns = ['p'])
+        df_p_beta['beta'] = beta
+        df_p_beta['chi2'] = N_samples*beta**2
+
+
+        if threshold == 'Bonferroni':
+            df_p_beta = df_p_beta[df_p_beta<0.05/df_p_beta.shape[0]]
+
+
+        elif 'top_' in threshold:
+            df_p_beta.sort_values('p', inplace = True)
+            nb_keep = int(threshold.split('_')[-1]) # retrieve number of pc to keep
+            df_p_beta = df_p_beta.iloc[:nb_keep,:]
+
+        elif threshold == 'all' :
+            pass
+
+        else :
+            print('Threshold type not found')
+
+        pc_to_look = list(df_p_beta.index)
+
+        df_pc = self.df_loadings.loc[pc_to_look,:]
+
+        # res = df_p_beta['chi2'].values.reshape(-1,1)*df_pc
+        res = pd.merge(df_pc, df_p_beta, left_index = True, right_index = True ) 
+
+        if return_raw_contrib == True :
+            return res
+        else :
+            df_loadings = res.iloc[:,:-3]
+            df_loadings = df_loadings*df_loadings
+            df_prod_contrib = res["chi2"].values.reshape(-1,1)*df_loadings
+            return df_prod_contrib.sum().sort_values(ascending = False)#.to_dict()
+
+    def get_graph(self, L_d_otu_pvals, name = 'test.html', tree = None, figsize = (800,800), show = True, notebook = True):
+
+        # Plot graph
+        build_graph(L_d_otu_pvals, name = name, tree = tree, figsize = figsize, show = show, notebook = notebook)
+
+    def split_contribution(self, prod_contrib, sep = "|"):
+        df_contrib_otu = pd.Series(index = self.model.cols_outputs) 
+        for otu in tqdm(df_contrib_otu.index) :
+            df_contrib_otu[otu] = prod_contrib[prod_contrib.index.str.contains("^"+otu+"\||\|"+otu+"$", regex = True)].sum()
+        return df_contrib_otu
+
diff --git a/python/src/manocca.py b/python/src/manocca.py
index d4b814126a780307ca66c7ffb86f929749439255..a48624dd68e62c5eec613e24904ce61f126337b6 100644
--- a/python/src/manocca.py
+++ b/python/src/manocca.py
@@ -11,7 +11,8 @@ from joblib import Parallel, delayed, cpu_count
 
 from tools.preprocessing_tools import scale
 from tools import preprocessing_tools as pt
-from tools.compute_manova import custom_manova
+from tools.compute_manova import custom_manova, custom_mancova
+
 
 import warnings
 warnings.filterwarnings('ignore')
@@ -19,12 +20,92 @@ warnings.filterwarnings('ignore')
 
 class MANOCCA:
     def __init__(self, predictors, outputs, covariates=None, cols_outputs = None,
-                 cols_predictors = None, cols_covariates = None, prodV_red=None, n_comp = None, use_resid = True, n_jobs = 1):
+                 cols_predictors = None, cols_covariates = None, prodV_red=None, n_comp = None, prod_to_keep = None, use_resid = True, n_jobs = 1):
+
+    """ Class performing a covariance test between a set of outputs, predictors and covariates.
+
+    For a sample of N, given a set of k outputs Y , n_p predictors X, and n_c covariates C the class performs the test : Y ~ X + C
+
+
+    Parameters
+    ----------
+    predictors : numpy array or pandas dataframe
+        Matrix of N x n_p predictors for the test, X in the formula. Each column should be a distinct predictor, and the tests are performed independently for each predictors.
+
+    outputs : numpy array or pandas dataframe
+        Matrix of N x k outputs from which to test the covariance, Y in the formula. Rows should be the samples and columns should be the taxa/outcome for the sample. The class will then compute the covariance and all the steps to prepare the test.
+
+    covariates : numpy array or pandas dataframe, default = None
+        Matrix of N x n_c covariates to include in the linear model, C in the formula. The final test is performed on the residuals derived using the covariates inputed here.
+
+    cols_ouputs : list, default = None
+        List of names for the outputs. If a dataframe is given previously, the column names are extracted. Otherwise if None and outputs is a np.array, the columns are set by default to integers from 1 to outputs.shape[1]
+
+    cols_predictors : list, default = None
+        List of names for the predictors. If a dataframe is given previously, the column names are extracted. Otherwise if None and predictors is a np.array, the columns are set by default to integers from 1 to predictors.shape[1]
+
+    cols_covariates : list, default = None
+        List of names for the covariates. If a dataframe is given previously, the column names are extracted. Otherwise if None and covariates is a np.array, the columns are set by default to integers from 1 to covariates.shape[1]
+
+    prodV_red : np.array, default = None
+        Pre-computed prodV_red from a previous run. To save some time, and since the covariance computation ahead of the test is time consuming, you can directly input the already computed prodV_red from a previous run. 
+        You can retrieve the prodV_red from a previous run by doing : model.prodV_red, model being a previous computed (at least initialized) MANOCCA model.
+
+    n_comp : int, default = None
+    Number of principal components to keep in the model. It can later still be reduced in the .test() method, but it can never be increased above the value inputed here. If None, it will be by default set to min(N_samples, N_products)
+
+    prod_to_keep : list, default = None
+    List of products to keep in the test. This feature is still under development (23/05/2023) and needs to be manipulated with care. Products to keep should be listed as strings under the format : 'var1|var2', var1 and var2 being two output variables, and var1 being 
+    a column indexed smaller than var2. In other words, if the column names for outputs are : [var1, .... , var2, ... ] it works, but not : [..., var2, ... , var1]
+
+    use_resid : bool, default = True
+    Whether or not to use residuals to adjust the covariates. If True the predictors are tested with regards to the PCs adjusted for the covariates, if False the covariates are included in the linear model PC_i ~ X+C.  !!! To this day (23/05/2023) only use_resid=True works !!! 
+    Not using the residuals currently displays miscalibration with an inflation of type 1 error rate.
+
+    n_jobs : int, default = 1
+    Number of jobs to use in parrallel. If set to -1, the max number of available cores is used. 
+
+
+    Attributes
+    ----------
+
+    All initialized parameters are set as attributes :
+    predictors : numpy array or pandas dataframe
+    outputs : numpy array or pandas dataframe
+    covariates : numpy array or pandas dataframe
+    cols_ouputs : list
+    cols_predictors : list
+    cols_covariates : list
+    prodV_red : np.array
+    n_comp : int
+    prod_to_keep : list
+    use_resid : bool
+    n_jobs : int
+
+    # Additional attributes #
+    prodV : numpy array, matrix of products
+    prodV_red : numpy array, matrix of principal components kept
+    pca : sklearn.decomposition.PCA, pca used to reduce the dimension
+    p : numpy array, array of p_values for the test
+
+
+    Examples :
+    >>> import numpy as np
+    >>> Y = np.random.normal(0,1,(100,10))
+    >>> X = np.random.binomial(1,0.4,(100,1))
+    >>> C = np.random.normal(0,1,(100,2))
+    >>> m = MANOCCA(X, Y, C, n_comp = 20, n_jobs = -1)
+    >>> m.test(n_comp = 10)
+    >>> m.p
+    array([[0.25268347]]) #depends of random state
+
+
+    """
         
         ### Initializing 
         self.outputs = outputs
         self.cols_outputs = cols_outputs
-        if outputs is not None :
+        if not isinstance(outputs, type(None)) :
             self.outputs, self.cols_outputs = pt._extract_cols(self.outputs, self.cols_outputs)
 
         self.predictors = predictors
@@ -50,72 +131,150 @@ class MANOCCA:
         self.pca = None
         self.p = None
 
+        # Filtering product columns
+        self.prod_to_keep = prod_to_keep
+        if not isinstance(self.prod_to_keep, type(None)):
+            self.prod_to_keep = np.array(self.prod_to_keep)
+            if not isinstance(self.prod_to_keep[0], type(list)) and not isinstance(self.prod_to_keep[0], np.ndarray) :
+                self.prod_to_keep = [self.prod_to_keep]
+
         # Compute prodV_red if not given
         if prodV_red is not None:
             self.prodV_red = prodV_red
 
         else : # If not we preprocess the data and compute prodV and prodV_red
-            self.prodV = self.get_prodV_para_wrap(scale(self.outputs), n_jobs = self.n_jobs)
-            self.prodV = scale(self.prodV)
-            self.prodV_red, self.pca = self.get_prodV_red(self.prodV, self.n_comp, return_pca = True) # prodV_red is scaled within the get_prodV_red function
+            self.prodV = self.get_prodV_para_wrap(self.outputs)
+            # self.prodV = scale(self.prodV)
+            if not isinstance(self.prod_to_keep, type(None)): # Filtering out some columns
+                self.filter_prodV_columns()
+            self.prodV_red, self.pca = self.get_prodV_red(self.n_comp, return_pca = True) # prodV_red is scaled within the get_prodV_red function
+            self.prodV_red = scale(pt.get_qt(self.prodV_red))
+            # self.prodV_red = scale(self.prodV_red) # NO SCALE IN GET_PRODV_RED FUNCTION
+            # self.prodV_red = pt.get_qt(self.prodV_red) # TO DELETE
 
         if use_resid and isinstance(covariates, np.ndarray):
             print("computing residuals")
             self.prodV_red = np.apply_along_axis(lambda x : pt.adjust_covariates(x,covariates), axis = 0, arr = self.prodV_red)
         
     ### Functions to generate prodV and prodV_red ###
-    def get_prodV_para(self, DD0, job_id, nb_iter):
-        prodV_temp = np.empty((DD0.shape[0],0))
-        for i in range(job_id*nb_iter, min((job_id+1)*nb_iter,DD0.shape[1]-1)):
-            tmp = np.transpose(np.transpose(DD0[:,(i+1):DD0.shape[1]])*DD0[:,i])
-            tmp = pt.get_qt(tmp)
-            prodV_temp = np.hstack([prodV_temp, tmp])
-        return prodV_temp
-
 
     def get_prodV_para_effi(self, DD0, job_id, nb_compute):
+        """  Computes the product matrix for a set of outputs and a given number of jobs
+
+
+        Parameters
+        ----------
+        DD0 : numpy array
+            Outputs from which the covariance (product matrix) should be computed. It should be self.outputs
+
+        job_id : int
+            job identifcation, equals to the step in the loop.
+
+        nb_compute : int,
+            Total number of cores allocated for the task. Equals to n_jobs in all cases, except when n_jobs=-1 where its value is cpu_count() from the joblib library.
+
+        Returns
+        -------
+        L_prodV : list of numpy arrays, list of all the chunks of product matrix computed by the different process. 
+        """
+
         L_prodV = []
         for i in range(job_id, DD0.shape[1]-1, nb_compute):
-            tmp = np.transpose(np.transpose(DD0[:,(i+1):DD0.shape[1]])*DD0[:,i])
+            # tmp = np.transpose(np.transpose(DD0[:,(i+1):DD0.shape[1]])*DD0[:,i])
+            tmp = (DD0[:,(i+1):]*DD0[:,i].reshape(-1,1))
+            # std = tmp.std(axis = 0)
             tmp = pt.get_qt(tmp)
+            # tmp = (std) * tmp
             L_prodV += [tmp]
         return L_prodV
 
-### DEPRECIATED
-    # def get_prodV_para_wrap(self, DD0, n_jobs =-1):
-    #     if n_jobs == 1 :
-    #         print("Sequential computation of prodV")
-    #         return self.get_prodV_para(DD0, 0, DD0.shape[1])
-    #     else :
-    #         print("Parallel computation of prodV")
-    #         nb_compute = cpu_count()
-    #         nb_iter = int(DD0.shape[1]/nb_compute) + int(DD0.shape[1]%nb_compute>0)
-    #         res = Parallel(n_jobs=n_jobs, verbose = 10)(delayed(self.get_prodV_para)(DD0,j, nb_iter) for j in range(nb_compute))
-    #         return np.hstack(res)
-
-    def get_prodV_para_wrap(self, DD0, n_jobs =-1):
+
+    def get_prodV_para_wrap(self, DD0):
+        """  Computes the product matrix for a set of outputs
+        
+        This function wraps the parallel processing to compute the product matrix. It parameters the get_prodV_para_effi function depending on the parralel status.
+        After the parralel computation, the chunks are re-organised to match an order where the products are all computed from left to right. Ie, with the ouputs columns names : 
+        [v1, v2, v3, v4], we want : [v1|v2, v1|v3, v1|v4, v2|v3, v2|v4, v3|v4] and no other order. This helps keep track of the products. 
+
+        Parameters
+        ----------
+        DD0 : numpy array
+            Outputs from which the covariance (product matrix) should be computed. It should be self.outputs
+
+
+        Returns
+        -------
+        prodV : numpy array, the matrix of N_samples x k*(k-1)/2 with k the number of outpts, to use in the MANOCCA test
+
+        """
+        n_jobs = self.n_jobs
         if n_jobs == 1 :
             print("Sequential computation of prodV")
-            return self.get_prodV_para(DD0, 0, DD0.shape[1])
+            res = [self.get_prodV_para_effi(DD0, 0, 1)]
         else :
             print("Parallel computation of prodV")
             nb_compute = cpu_count()
             res = Parallel(n_jobs=n_jobs, verbose = 10)(delayed(self.get_prodV_para_effi)(DD0,j, nb_compute) for j in range(nb_compute))
-            # reordering
-            res_ordered = []
-            for _ in range(len(res[0])):
-                for L in res :
-                    if L != [] : 
-                        res_ordered += [L.pop(0)]
-            return np.hstack(res_ordered)
-
-    def get_prodV_red(self, prodV, n_comp, return_pca = False, svd_solver = 'full'):
+        # reordering
+        res_ordered = []
+        for _ in range(len(res[0])):
+            for L in res :
+                if L != [] : 
+                    res_ordered += [L.pop(0)]
+        return np.hstack(res_ordered)
+
+    def get_prodV_red(self, n_comp, return_pca = False, svd_solver = 'full'):
+        """  Computes the PCA reduced matrix given the product matrix
+            
+        The product matrix should already be computed in self.prodV.
+
+        Parameters
+        ----------
+        n_comp : int
+            Number of Principal Components (PC) to use for the test.
+
+        return_pca : bool, default = False
+            Flag to return the pca if needed. If yes, (prodV_red, pca) is returned, otherwise just prodV_red
+
+        svd_solver : string, default='full'
+            Solver to compute the PCA. See sklearn.decomposition.PCA documentation for more details.
+
+
+        Returns
+        -------
+        prodV_red : numpy array, the matrix of N_samples x n_comp to use in the MANOCCA test
+
+        """
         pca = PCA(n_components = n_comp, svd_solver = svd_solver)
-        tmp = pca.fit_transform(prodV) 
+        tmp = pca.fit_transform(self.prodV) 
         if return_pca:
-            return scale(tmp), pca 
+            return tmp, pca  # SCALE
         else :
-            return scale(tmp)
+            return tmp
+
+    def get_prod_cols(self, cols, sep = '|'):
+        return [cols[i]+sep+cols[j] for i in range(len(cols)-1) for j in range(i+1,len(cols))]
+
+    def get_prod_to_keep(self):
+        L_to_keep = [self.get_prod_cols(el) for el in self.prod_to_keep]
+        L_to_keep = [item for sublist in L_to_keep for item in sublist] #flatten list
+        return L_to_keep
+
+
+    def filter_prodV_columns(self):
+        cols_prod = self.get_prod_cols(self.cols_outputs)
+
+        if self.prod_to_keep[0][0] in self.cols_outputs: #if first element in columns then we need to compute the products
+            L_to_keep = self.get_prod_to_keep()
+        else: # else products are already given
+            L_to_keep = self.prod_to_keep[0]
+
+        L_index = [cols_prod.index(x) for x in L_to_keep]
+        
+        self.prodV = self.prodV[:,L_index]
+
+        
+
 
 
     def test(self, var = None, n_comp = None, min_comp = 0):
@@ -127,10 +286,11 @@ class MANOCCA:
         n_preds = var.shape[1]
         p = np.empty((n_preds,1))
         for i_pred in range(n_preds):
+            # print(self.cols_predictors[i_pred])
             nan_mask = np.isnan(var[:,i_pred])
             if ((self.covariates is not None) and (self.use_resid == False)) :
                 # print("With covariates")
-                p[i_pred] = custom_manova(self.prodV_red[~nan_mask,min_comp:n_comp], scale(var[~nan_mask,i_pred].reshape(-1,1)), scale(self.covariates[~nan_mask,:]))
+                p[i_pred] = custom_mancova(scale(self.prodV_red[~nan_mask,min_comp:n_comp]), scale(var[~nan_mask,i_pred].reshape(-1,1)), scale(self.covariates[~nan_mask,:]))
             else :
                 # print("Without covariates")
                 p[i_pred] = custom_manova(self.prodV_red[~nan_mask,min_comp:n_comp], scale(var[~nan_mask,i_pred].reshape(-1,1)))
diff --git a/python/src/simulation.py b/python/src/simulation.py
index 623ff0f767a48ff1808354a0de3ee1763ad64133..039a97163ebcc4b0de214ac04fa7654822d9bf74 100644
--- a/python/src/simulation.py
+++ b/python/src/simulation.py
@@ -3,6 +3,7 @@ import pandas as pd
 
 from manocca import MANOCCA
 from manova import MANOVA
+from alpha_diversity import ALPHA_DIVERSITY
 
 
 from tools.preprocessing_tools import scale
@@ -18,12 +19,13 @@ import tools.preprocessing_tools as pt
 
 class Simu :
     def __init__(self,  methods, predictors, outputs=None, covariates=None, cols_outputs = None,
-                 cols_predictors = None, cols_covariates = None, L_preproc = [], prodV_red=None, use_resid = True, n_comp = None, n_jobs = 1):
+                 cols_predictors = None, cols_covariates = None, L_preproc = [], prodV_red=None, diversity_metric = None, prod_to_keep = None, use_resid = True, n_comp = None, n_jobs = 1):
 
             ### Initializing 
+        self.raw_outputs = outputs
         self.outputs = outputs
         self.cols_outputs = cols_outputs
-        if covariates is not None :
+        if not isinstance(outputs, type(None)) :
             self.outputs, self.cols_outputs = pt._extract_cols(self.outputs, self.cols_outputs)
 
         self.predictors = predictors
@@ -36,13 +38,16 @@ class Simu :
         if covariates is not None :
             self.covariates, self.cols_covariates = pt._extract_cols(self.covariates, self.cols_covariates) 
 
-        self.prodV_red = prodV_red
+        # methods specific parameters
+        self.prodV_red = prodV_red # for MANOCCA
+        self.prod_to_keep = prod_to_keep
+        self.diversity_metric = diversity_metric # for alpha_diversity
 
         self.n_comp = n_comp
         self.use_resid = use_resid
         self.n_jobs = n_jobs
 
-        if len(L_preproc)>0:
+        if len(L_preproc)>0 and not isinstance(outputs, type(None)) :
             self.outputs = pt.pipeline(self.outputs, L_pipe = L_preproc)
 
         if isinstance(methods, str):
@@ -58,9 +63,14 @@ class Simu :
     def _build_model(self):
         for m in self.methods :
             if m == 'MANOCCA':
-                self.L_models += [MANOCCA(self.predictors, self.outputs, self.covariates, self.cols_outputs, self.cols_predictors, self.cols_covariates, self.prodV_red, self.n_comp, self.use_resid, self.n_jobs)]
+                self.L_models += [MANOCCA(predictors = self.predictors, outputs = self.outputs, covariates = self.covariates, cols_outputs = self.cols_outputs, cols_predictors = self.cols_predictors, cols_covariates = self.cols_covariates,
+                                          prodV_red = self.prodV_red, n_comp = self.n_comp, prod_to_keep = self.prod_to_keep, use_resid = self.use_resid, n_jobs = self.n_jobs)]
             elif m == 'MANOVA':
-                self.L_models += [MANOVA(self.predictors, self.outputs, self.covariates, self.cols_outputs, self.cols_predictors, self.cols_covariates, self.use_resid)]
+                self.L_models += [MANOVA(predictors = self.predictors, outputs = self.outputs, covariates = self.covariates, cols_outputs = self.cols_outputs, cols_predictors = self.cols_predictors, cols_covariates = self.cols_covariates, use_resid = self.use_resid)]
+            elif m == 'ALPHA_DIVERSITY' :
+                self.L_models += [ALPHA_DIVERSITY(predictors = self.predictors, outputs = self.raw_outputs, covariates = self.covariates, cols_outputs = self.cols_outputs, cols_predictors = self.cols_predictors, cols_covariates = self.cols_covariates,
+                                         metric = self.diversity_metric, use_resid = self.use_resid)]
+
     
     def get_model(self, num = 0):
         return self.L_models[num]
diff --git a/python/src/tools/build_graph.py b/python/src/tools/build_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..d445cfc265d603f7efdc4d3f70177e480de03565
--- /dev/null
+++ b/python/src/tools/build_graph.py
@@ -0,0 +1,129 @@
+import pandas as pd
+import numpy as np
+
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+
+from tqdm import tqdm
+
+from pyvis.network import Network #other option gravis
+import plotly.graph_objects as go
+import networkx as nx
+
+
+def get_otu_tree(otu, col='OTU'):
+    d_otu = df_tree[df_tree[col]==otu].to_dict('records')
+    d_otu = d_otu[0]
+    
+    to_display = ''
+    for key in d_otu.keys():
+        to_display = to_display + key + ' : ' + d_otu[key] + '\n'
+    return to_display
+
+def get_tree(df_tree, col) :
+    d_tree = {}
+    for el in df_tree[col].unique():
+        name = "\n".join(df_tree[df_tree[col]==el].apply(lambda x : x.name + " : " + x.values).values[0])
+        d_tree[el] = name
+    return d_tree
+
+
+def build_graph(L_d_otu_pvals, name = 'test.html', tree = None, figsize = (800,800), show = True, notebook = True):
+    """
+    The function build_graph constructs a network based on the output contribution for a list of predictors. The network is based on the networkw package 
+    and rendered using pyvis.
+    Input :
+        - L_d_otu_pvals : list of dictionaries : list for each predictor of the top contributions to plot in the network. 
+                i.e. for 2 predictors and 4 features F1, F2, F3, F4 [{"F1|F2" : 0.3, "F1|F4" : 0.1}, {"F1|F3" : 0.8, "F2|F4" : 0.02}]
+        - name : str - name to use to save the network before display. Mandatory for displaying the network.
+        - tree : dictionary - dictionary with key being the node names (features) and values being the string to display when hovering
+                i.e. for 4 features F1 and F2: {"F1" : "Feature 1, experimental factor", "F2" : "Order" : "order2 \n genus : genus2"}
+        - figsize : tuple - Size of figure (height,width)
+        - show : bool - display network, if False returns the network
+        - notebook : bool - True when displaying in jupyter notebook, else set to False
+    Output :
+        - None if show is True, else returns net, G the constructed pyvis network and networkx graph
+
+    """
+
+    nodes = set()
+    edges = set()
+
+    cmap = plt.cm.rainbow
+    
+    n_pheno = len(L_d_otu_pvals)
+    
+    d_color_id = {}
+    
+    G=nx.Graph()
+    
+    for k, d_otu_pvals in enumerate(L_d_otu_pvals):
+        color_id = k * int(cmap.N/(n_pheno))
+        for otux in tqdm(d_otu_pvals):
+            otux_split = otux.split('|')
+            node_names = []
+            for otu in otux_split: # loop on the two features
+                tree_display = tree[otu]
+                node_name = otu #+ "\n" + tree_display.split(' : ')[-1]
+                node_names += [node_name]
+#                 print(node_names)
+                if node_name not in nodes : # create node if not already created
+#                     print(nodes)
+#                     print(node_name)
+                    G.add_node(node_name, size = 10, title = tree_display, color = mcolors.rgb2hex(cmap(color_id))) #  '#F0F8FF')
+                    d_color_id[node_name] = [color_id]
+    #                     print(G[node_name])
+                    nodes.add(node_name)
+                else :
+#                     print(G.nodes[node_name])
+                    if color_id not in d_color_id[node_name]:
+                        d_color_id[node_name]+=[color_id]
+                        new_color_id = int(sum(d_color_id[node_name])/len(d_color_id[node_name]))
+                        nx.set_node_attributes(G,{node_name : mcolors.rgb2hex(cmap(new_color_id)) },'color')
+                    else :
+                        pass
+#                     print(node_name)
+#                     G.nodes[node_name]['color'] = 'b'# mcolors.rgb2hex(cmap(color_id))
+#                     if node_name not in d_color_id.keys():
+#                         nx.set_node_attributes(G,{node_name : 'black'},'color')
+#                     G[node_name][0]["color"] = 'y'
+
+
+            # Create edge between nodes
+    #         G.add_edge(otux_split[0],otux_split[1], value = d_otu_pvals[otux], title = d_otu_pvals[otux])
+    #         edges.add((otux_split[0],otux_split[1]))
+
+            ### if genus is added
+            if (node_names[0],node_names[1]) not in edges :
+                G.add_edge(node_names[0],node_names[1], value = d_otu_pvals[otux], title = d_otu_pvals[otux], color = 'grey')
+                edges.add((node_names[0],node_names[1]))
+    
+    # Change node size
+    d=dict(G.degree)
+    d.update((x,10*np.sqrt(y)) for x, y in d.items())
+    nx.set_node_attributes(G,d,'size')
+    
+    
+    # add legend
+    # Name_features = ["AGE", "SEX", 'BMI', 'TABAC']
+    # color_1 = mcolors.rgb2hex(cmap(int(0*cmap.N/(n_pheno-1))))
+    # color_2 = mcolors.rgb2hex(cmap(int(1*cmap.N/(n_pheno-1))))
+    # G.add_node("AGE", size = 100, color = mcolors.rgb2hex(cmap(0*int(cmap.N/(n_pheno-1)))))
+    # G.add_node("SEX", size = 100, color = mcolors.rgb2hex(cmap(1*int(cmap.N/(n_pheno-1)))))
+    # G.add_node("BMI", size = 100, color = mcolors.rgb2hex(cmap(2*int(cmap.N/(n_pheno-1)))))
+    # G.add_node("TABAC", size = 100, color = mcolors.rgb2hex(cmap(3*int(cmap.N/(n_pheno-1)))))
+    # G.add_node("AGE x SEX", size = 100, color = mcolors.rgb2hex(cmap(int(((0+1)/2)*cmap.N/(n_pheno-1)))))
+    # G.add_node("SEX x BMI", size = 100, color = mcolors.rgb2hex(cmap(int(((1+2)/2)*cmap.N/(n_pheno-1)))))
+    # G.add_node("BMI x TABAC", size = 100, color = mcolors.rgb2hex(cmap(int(((2+3)/2)*cmap.N/(n_pheno-1)))))
+    # G.add_node("AGE x TABAC", size = 100, color = mcolors.rgb2hex(cmap(int(((0+3)/2)*cmap.N/(n_pheno-1)))))
+
+
+    ### DISPLAY ###
+    net = Network(height = figsize[0], width = figsize[1], notebook = notebook)
+#     net = Network("500px", "500px", notebook = True)
+    net.show_buttons(filter_=['physics'])
+    net.from_nx(G)
+    if show :
+        display(net.show(name))
+    else :
+        return net, G
\ No newline at end of file
diff --git a/python/src/tools/compute_manova - Copy.py b/python/src/tools/compute_manova - Copy.py
new file mode 100644
index 0000000000000000000000000000000000000000..39c2b830a73562212efa59471b7f62ae95f5f596
--- /dev/null
+++ b/python/src/tools/compute_manova - Copy.py	
@@ -0,0 +1,108 @@
+import numpy as np
+
+from scipy.stats import f
+
+# to remove, only used for tests
+# from sklearn.preprocessing import scale
+from .preprocessing_tools import scale
+
+def custom_manova(Y,X,C=None):
+    ### /!\ data needs to be scaled, X needs to be (k,1)
+    # if C is not None:
+    #     X_C = np.hstack([X,C]) #[C,X]
+    #     # beta = np.dot(np.dot(Y.T,X_C),np.linalg.inv(np.dot(X_C.T,X_C)))
+    #     beta = np.dot(np.linalg.inv(np.dot(X_C.T,X_C)), np.dot(X_C.T, Y))
+    #     beta = beta[0,:].reshape(-1,1) #-1
+    # else :
+    #     beta = np.dot(Y.T,X)/X.shape[0]
+    beta = linear_regression(Y, X, C)
+    dot_Y = np.dot(Y.T,Y)
+
+    (sign_num, num) = np.linalg.slogdet(dot_Y-X.shape[0]*np.dot(beta,beta.T))
+    (sign_denom, denom) = np.linalg.slogdet(dot_Y)
+    lamb = np.exp(sign_num*num-(sign_denom*denom))
+
+    fisher = (Y.shape[0]-Y.shape[1]-1)/(Y.shape[1])*(1-lamb)/lamb
+    return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1)
+
+
+
+def linear_regression(Y, X, C=None):
+    ones = np.ones((X.shape[0],1))
+    if C is not None:
+        X_C = np.hstack([X,C]) #,ones]) #[C,X]
+        # beta = np.dot(np.dot(Y.T,X_C),np.linalg.inv(np.dot(X_C.T,X_C)))
+        beta = np.dot(np.linalg.inv(np.dot(X_C.T,X_C)), np.dot(X_C.T, Y))
+        beta = beta[0,:].reshape(-1,1) #-1
+    else :
+        beta = np.dot(Y.T,X)/X.shape[0]
+
+    return beta
+
+
+
+from scipy.stats import t
+
+def ttest(beta, X, Y):
+    L_p = []
+
+    for i in range(beta.shape[0]) :
+        se_b = np.sqrt(1/(X.shape[0]-2)*np.sum((Y[:,i].reshape(-1,1)-beta[i]*X)**2)/np.sum((X-X.mean())**2))
+        L_p += [2*(t.sf(np.abs(beta[i])/se_b,X.shape[0]-2))[0]]
+        # L_p += [1 - 2 * np.abs(0.5 - np.vectorize(t.cdf)(beta[i]/se_b, X.shape[0] - 2))[0]]
+
+    return L_p
+
+### TESTS ###
+# Y = scale(np.random.randint(0,3,(100,50)))
+# X = scale(np.random.randint(0,3,(100,1)))
+# C = scale(np.random.randint(0,3,(100,3)))
+# #Y = np.random.randint(0,3,(10,5))
+# # print(Y)
+# # print(X)
+# # print(np.dot(X.T,X))
+
+
+# X_C = np.hstack([X,C])
+# print(np.dot(X_C.T, Y).shape)
+
+
+# print(custom_manova(Y,X,C))
+
+
+# import sys
+# import os
+# # os.listdir('/Documents/python/Micmat/tools')
+# sys.path.insert(0, '\\Users\\cboetto\\Documents\\python\\Micmat\\tools')
+# import preprocessing_tools as pt
+# from manova import custom_manova as cm
+
+# help(cm)
+# print(cm(Y,X))
+
+# Y_C = np.apply_along_axis(lambda x : pt.adjust_covariates(x,C), axis = 0, arr = Y)
+# print(cm(Y_C,X))
+
+
+
+
+
+
+
+### TRASH ###
+# def custom_manova_old(Y,X_,C=None):
+#     ### /!\ data needs to be scaled
+#     n_var = X_.shape[1]
+#     if C :
+#         X = np.hstack([X_,C])
+
+#     beta = np.dot(Y.T,X)/X.shape[0]
+#     beta = []
+#     dot_Y = np.dot(Y.T,Y)
+
+#     (sign_num, num) = np.linalg.slogdet(dot_Y-X.shape[0]*np.dot(beta,beta.T))
+#     (sign_denom, denom) = np.linalg.slogdet(dot_Y)
+#     lamb = np.exp(sign_num*num-(sign_denom*denom))
+
+#     fisher = (Y.shape[0]-Y.shape[1]-1)/(Y.shape[1])*(1-lamb)/lamb
+#     return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1)
diff --git a/python/src/tools/compute_manova.py b/python/src/tools/compute_manova.py
index 4d8348985588106901071728a6fb69ee7c1f5a6c..34d1cbc471af1f1a5e1c08f4eb75dd53734ff781 100644
--- a/python/src/tools/compute_manova.py
+++ b/python/src/tools/compute_manova.py
@@ -6,7 +6,7 @@ from scipy.stats import f
 # from sklearn.preprocessing import scale
 from .preprocessing_tools import scale
 
-def custom_manova(Y,X,C=None):
+def custom_manova(Y,X,C=None, return_beta = False):
     ### /!\ data needs to be scaled, X needs to be (k,1)
     # if C is not None:
     #     X_C = np.hstack([X,C]) #[C,X]
@@ -18,6 +18,44 @@ def custom_manova(Y,X,C=None):
     beta = linear_regression(Y, X, C)
     dot_Y = np.dot(Y.T,Y)
 
+    (sign_num, num) = np.linalg.slogdet(dot_Y-X.shape[0]*np.dot(beta,beta.T))
+    (sign_denom, denom) = np.linalg.slogdet(dot_Y)
+    lamb = np.exp(sign_num*num-(sign_denom*denom))
+    # lamb = np.exp(num-denom)
+
+    # print(sign_num)
+    # print(sign_denom)
+    # print(num)
+    # print(denom)
+    # print(lamb)
+    # print(num-denom)
+
+    fisher = (Y.shape[0]-Y.shape[1]-1)/(Y.shape[1])*(1-lamb)/lamb
+
+    # print(fisher)
+    # print(Y.shape[1])
+    # print(Y.shape[0])
+    # print(f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1))
+    if return_beta == False :
+        return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1)
+    else :
+        return f.sf(fisher, Y.shape[1], Y.shape[0]-Y.shape[1]-1), beta
+
+
+def custom_mancova(Y,X,C=None):
+    ### /!\ data needs to be scaled, X needs to be (k,1)
+    # if C is not None:
+    #     X_C = np.hstack([X,C]) #[C,X]
+    #     # beta = np.dot(np.dot(Y.T,X_C),np.linalg.inv(np.dot(X_C.T,X_C)))
+    #     beta = np.dot(np.linalg.inv(np.dot(X_C.T,X_C)), np.dot(X_C.T, Y))
+    #     beta = beta[0,:].reshape(-1,1) #-1
+    # else :
+    #     beta = np.dot(Y.T,X)/X.shape[0]
+    # print(Y.mean())
+    # print(X.mean())
+    Y, beta = linear_regression_test(Y, X, C)
+    dot_Y = np.dot(Y.T,Y)
+
     (sign_num, num) = np.linalg.slogdet(dot_Y-X.shape[0]*np.dot(beta,beta.T))
     (sign_denom, denom) = np.linalg.slogdet(dot_Y)
     lamb = np.exp(sign_num*num-(sign_denom*denom))
@@ -30,16 +68,32 @@ def custom_manova(Y,X,C=None):
 def linear_regression(Y, X, C=None):
     ones = np.ones((X.shape[0],1))
     if C is not None:
-        X_C = np.hstack([X,C,ones]) #[C,X]
+        # X_C = np.hstack([X,C]) #,ones]) #[C,X]
+        X_C = np.hstack([ones,C,X]) #,ones]) #[C,X]
         # beta = np.dot(np.dot(Y.T,X_C),np.linalg.inv(np.dot(X_C.T,X_C)))
         beta = np.dot(np.linalg.inv(np.dot(X_C.T,X_C)), np.dot(X_C.T, Y))
-        beta = beta[0,:].reshape(-1,1) #-1
+        beta = beta[-1,:].reshape(-1,1) #-1
     else :
         beta = np.dot(Y.T,X)/X.shape[0]
-
     return beta
 
 
+def linear_regression_test(Y, X, C=None):
+    ones = np.ones((X.shape[0],1))
+    if C is not None:
+        X_C = np.hstack([ones,C,X]) #,ones]) #[C,X]
+        # beta = np.dot(np.dot(Y.T,X_C),np.linalg.inv(np.dot(X_C.T,X_C)))
+        beta = np.dot(np.linalg.inv(np.dot(X_C.T,X_C)), np.dot(X_C.T, Y))
+        
+        Y = Y- np.dot(X_C[:,:-1],beta[:-1,:]) #Y - X_C[:,:-1] @ beta[:-1,:] # the @ option is slightly slower 13.5 us vs 23 us
+
+        beta = beta[-1,:].reshape(-1,1) #-1
+    else :
+        beta = np.dot(Y.T,X)/X.shape[0]
+
+    return Y, beta 
+
+
 
 from scipy.stats import t
 
diff --git a/python/src/tools/diversity_metrics.py b/python/src/tools/diversity_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a45f78cb3805ffdd709af9aa0b5cf8e8c123511
--- /dev/null
+++ b/python/src/tools/diversity_metrics.py
@@ -0,0 +1,14 @@
+import numpy as np
+import pandas as pd
+
+
+def shannon_diversity(d):
+    if isinstance(d,pd.DataFrame):
+        return (d/d.sum()*np.log(d/d.sum())).fillna(0).sum() #fillna for log of 0 values chich produce NaNs
+    else :
+        return np.nan_to_num(d/d.sum()*np.log(d/d.sum())).sum()
+
+
+
+def simpson_diversity(d):
+    return 1 - (d*(d-1)).sum()/(d.sum()*(d.sum()-1))
\ No newline at end of file
diff --git a/python/src/tools/preprocessing_tools.py b/python/src/tools/preprocessing_tools.py
index 0e078be754af56cc27902b8632da24088002a69c..d41bebba30b80126486f2ee7985dbaf4e3c313d7 100644
--- a/python/src/tools/preprocessing_tools.py
+++ b/python/src/tools/preprocessing_tools.py
@@ -24,6 +24,7 @@ def _extract_cols(data, cols):
     else :
         if isinstance(cols, type(None)) :
             cols = list(range(data.shape[1]))
+
     return data, cols
 
 
@@ -31,11 +32,15 @@ def _extract_cols(data, cols):
 def scale(a):
     return np.apply_along_axis(lambda x : (x-np.nanmean(x))/np.nanstd(x), axis = 0, arr = a)
 
+def center(a):
+    return np.apply_along_axis(lambda x : (x-np.nanmean(x)), axis = 0, arr = a)
+
 
 
 
 def selecting_cols(df, threshold, verbose = 1):
     # Selecting columns with at least a percent of occurrences in cohort
+    # /!\ IDs must be on the first column
     cols_filter = np.append(np.array([True]),((df.iloc[:,1:]!=0).sum(axis = 0)>threshold*df.shape[0]).values)
 
     if verbose > 0 :
@@ -141,6 +146,9 @@ def wrap_preproc(data, preproc, cols = None):
     if preproc == 'log10':
         print("***** Computing log *****")
         return apply_log10(data)
+    if preproc == 'arcsin_root':
+        print("***** Computing arcsin_root *****")
+        return apply_arcsin_root(data)
     if preproc == 'scale':
         print("***** Computing scale *****")
         return apply_scale(data)
@@ -189,11 +197,12 @@ def apply_scale(data_, cols = None):
 
 def apply_center(data_, cols = None):
     data = data_.copy()
-    if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
-#         self.df_otu_data[self.cols_otu] = self.df_otu_data[self.cols_otu].apply(lambda x: np.log(x+0.000000001))
-        data = scale(data, with_std = False)
-    elif isinstance(data, np.ndarray):
-        data = scale(data, with_std = False)
+#     if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
+# #         self.df_otu_data[self.cols_otu] = self.df_otu_data[self.cols_otu].apply(lambda x: np.log(x+0.000000001))
+#         data = scale(data, with_std = False)
+#     elif isinstance(data, np.ndarray):
+#         data = scale(data, with_std = False)
+    data = center(data)
     return data
 
 
@@ -210,6 +219,16 @@ def apply_fisher(data_, cols = None):
         data = np.apply_along_axis(lambda x: 0.5*np.log((1+x)/(1-x)), axis = 1, arr = data)
     return data
         
+def apply_arcsin_root(data_, cols = None):
+    data = data_.copy()
+    if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
+#         self.df_otu_data[self.cols_otu] = self.df_otu_data[self.cols_otu].apply(lambda x: np.log(x+0.000000001))
+        data = data.apply(lambda x: np.arcsin(np.sqrt(x)))
+    elif isinstance(data, np.ndarray):
+        data = np.apply_along_axis(lambda x: np.arcsin(np.sqrt(x)), axis = 1, arr = data)
+    return data
+
+
 def get_qt(data_, cols = None):
     data = data_.copy()
     qt = QuantileTransformer(n_quantiles = data.shape[0], output_distribution = 'normal')
@@ -230,9 +249,9 @@ def get_pt(data_, cols = None):
 def get_proportion(data_, cols = None):
     data = data_.copy()
     if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
-        data = data.div(data.sum(axis = 1),axis = 0)
+        data = data.div(data.sum(axis = 1),axis = 0).replace(np.nan, 0)
     elif isinstance(data, np.ndarray):
-        data = np.divide(data, data.sum(axis=1).reshape(-1,1))
+        data = np.nan_to_num(np.divide(data, data.sum(axis=1).reshape(-1,1)))
     return data
 
 def positify(data_, cols = None):
diff --git a/python/src/univariate.py b/python/src/univariate.py
index db8058002dcc1431a2faafc17fb43b8811d108ca..84f9904e7b73b62aaedb5f95be379058c1fc106c 100644
--- a/python/src/univariate.py
+++ b/python/src/univariate.py
@@ -45,6 +45,7 @@ class UNIVARIATE :
             self.outputs = np.apply_along_axis(lambda x : pt.adjust_covariates(x,covariates), axis = 0, arr = self.outputs) 
 
         self.p = None
+        self.beta = None
 
     def test(self, **kwargs):
         nan_mask = np.isnan(self.predictors).flatten()
@@ -53,7 +54,7 @@ class UNIVARIATE :
             beta = linear_regression(self.outputs[~nan_mask], self.predictors[~nan_mask], self.covariates[~nan_mask])
         else :
             beta = linear_regression(self.outputs[~nan_mask], self.predictors[~nan_mask])
-
+        self.beta = beta
         res = ttest(beta, self.predictors[~nan_mask], self.outputs[~nan_mask])
         self.p = res