Example_manocca_script.py

# Script to run the MANOCCA test

import numpy as np
import pandas as pd

from src.tools.preprocessing_tools import scale

from manocca import MANOCCA


# Example setup
N_samples = 1000
K_outputs = 10 # needs to be at least 2 to compute at least one covariance
K_predictors = 1

outputs = np.random.normal(0,1,(N_samples,K_outputs))
predictors = np.random.binomial(1,0.4,(N_samples,K_predictors))


# Compute the model : Y ~ X + C
# Full call for MANOCCA
# manocca = MANOCCA(predictors, # pd.DataFrame or np.ndarray : the matrix or vector X in the model, the predictor of covariance.
#                   outputs, # pd.DataFrame or np.ndarray : the matrix Y in the model, in other words the covariance to study.
#                   covariates = None, # pd.DataFrame or np.ndarray : the C in the model, the covariates to adjust.
#                   cols_outputs = None, # specify column names of outputs, by default they are the column names of the pandas.DataFrame
#                   cols_predictors = None, # specify column names of predictor, by default they are the column names of the pandas.DataFrame
#                   cols_covariates = None, # specify column names of covariates, by default they are the column names of the pandas.DataFrame
#                   prodV_red = None, # you can input a previous computed prodV_red (can be retrieved from manocca.prodV_red) to avoid unnecessary computation
#                   n_comp = None, # number of principal components to use, by default it will be min(N_samples, K_outputs*(K_outputs - 1)/2)
#                   prod_to_keep = None, # usually None, unless you want to restrain the analysis to a specific list of products, ex : ['var1|var2','var1|var5',...]
#                   use_resid = True, # use residuals to adjust the covariates
#                   use_pca = False, # use pca to reduce the dimension of the product matrix
#                   n_jobs = 1 # number of jobs to use in parallel processing
#                  )

# Easy call :
manocca = MANOCCA(predictors,
                  outputs
                  )

manocca.test()

print("P-value for a random MANOCCA test (expected to be non significant) : %f" % manocca.p)