Skip to content
Snippets Groups Projects
Select Git revision
  • b7fb434f1fc724e77fa54ca5eb1c9687074b6a64
  • master default protected
2 results

Example_manocca_script.py

Blame
  • user avatar
    Christophe Boetto authored
    b7fb434f
    History
    Example_manocca_script.py 2.14 KiB
    # Script to run the MANOCCA test
    
    import numpy as np
    import pandas as pd
    
    from src.tools.preprocessing_tools import scale
    
    from manocca import MANOCCA
    
    
    # Example setup
    N_samples = 1000
    K_outputs = 10 # needs to be at least 2 to compute at least one covariance
    K_predictors = 1
    
    outputs = np.random.normal(0,1,(N_samples,K_outputs))
    predictors = np.random.binomial(1,0.4,(N_samples,K_predictors))
    
    
    # Compute the model : Y ~ X + C 
    # Full call for MANOCCA
    # manocca = MANOCCA(predictors, # pd.DataFrame or np.ndarray : the matrix or vector X in the model, the predictor of covariance.
    #                   outputs, # pd.DataFrame or np.ndarray : the matrix Y in the model, in other words the covariance to study.
    #                   covariates = None, # pd.DataFrame or np.ndarray : the C in the model, the covariates to adjust.
    #                   cols_outputs = None, # specify column names of outputs, by default they are the column names of the pandas.DataFrame
    #                   cols_predictors = None, # specify column names of predictor, by default they are the column names of the pandas.DataFrame
    #                   cols_covariates = None, # specify column names of covariates, by default they are the column names of the pandas.DataFrame
    #                   prodV_red = None, # you can input a previous computed prodV_red (can be retrieved from manocca.prodV_red) to avoid unnecessary computation 
    #                   n_comp = None, # number of principal components to use, by default it will be min(N_samples, K_outputs*(K_outputs - 1)/2)
    #                   prod_to_keep = None, # usually None, unless you want to restrain the analysis to a specific list of products, ex : ['var1|var2','var1|var5',...] 
    #                   use_resid = True, # use residuals to adjust the covariates
    #                   use_pca = False, # use pca to reduce the dimension of the product matrix
    #                   n_jobs = 1 # number of jobs to use in parallel processing 
    #                  )
    
    # Easy call :
    manocca = MANOCCA(predictors,
                      outputs
                      )
    
    manocca.test()
    
    print("P-value for a random MANOCCA test (expected to be non significant) : %f" % manocca.p)