Select Git revision
Example_manocca_script.py
Example_manocca_script.py 2.14 KiB
# Script to run the MANOCCA test
import numpy as np
import pandas as pd
from src.tools.preprocessing_tools import scale
from manocca import MANOCCA
# Example setup
N_samples = 1000
K_outputs = 10 # needs to be at least 2 to compute at least one covariance
K_predictors = 1
outputs = np.random.normal(0,1,(N_samples,K_outputs))
predictors = np.random.binomial(1,0.4,(N_samples,K_predictors))
# Compute the model : Y ~ X + C
# Full call for MANOCCA
# manocca = MANOCCA(predictors, # pd.DataFrame or np.ndarray : the matrix or vector X in the model, the predictor of covariance.
# outputs, # pd.DataFrame or np.ndarray : the matrix Y in the model, in other words the covariance to study.
# covariates = None, # pd.DataFrame or np.ndarray : the C in the model, the covariates to adjust.
# cols_outputs = None, # specify column names of outputs, by default they are the column names of the pandas.DataFrame
# cols_predictors = None, # specify column names of predictor, by default they are the column names of the pandas.DataFrame
# cols_covariates = None, # specify column names of covariates, by default they are the column names of the pandas.DataFrame
# prodV_red = None, # you can input a previous computed prodV_red (can be retrieved from manocca.prodV_red) to avoid unnecessary computation
# n_comp = None, # number of principal components to use, by default it will be min(N_samples, K_outputs*(K_outputs - 1)/2)
# prod_to_keep = None, # usually None, unless you want to restrain the analysis to a specific list of products, ex : ['var1|var2','var1|var5',...]
# use_resid = True, # use residuals to adjust the covariates
# use_pca = False, # use pca to reduce the dimension of the product matrix
# n_jobs = 1 # number of jobs to use in parallel processing
# )
# Easy call :
manocca = MANOCCA(predictors,
outputs
)
manocca.test()
print("P-value for a random MANOCCA test (expected to be non significant) : %f" % manocca.p)