""" function for SNP imputation """ import numpy as np import scipy as sc import scipy.linalg def compute_mu(sig_i_t, sig_t_inv, zt): return np.dot(sig_i_t, np.dot(sig_t_inv, zt)) def compute_var(sig_i_t, sig_t_inv, lamb, batch=True): if batch: var = (1 + lamb) - np.einsum('ij,jk,ki->i', sig_i_t, sig_t_inv ,sig_i_t.transpose()) ld_score = (sig_i_t**2).sum(1) else: var = (1 + lamb) - np.dot(sig_i_t, np.dot(sig_t_inv, sig_i_t.transpose())) ld_score = (sig_i_t**2).sum() return var, ld_score def check_inversion(sig_t, sig_t_inv): return np.allclose(sig_t, np.dot(sig_t, np.dot(sig_t_inv, sig_t))) def var_in_boundaries(var,lamb): """ Forces the variance to be in the 0 to 1+lambda boundary theoritically we shouldn't have to do that """ id_neg = np.where(var < 0) var_norm = var var[id_neg] = 0 id_inf = np.where(var > (1+lamb)) var[id_inf] = 1 return var def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True): """ Argument: zt : (vector) the vector of known Z scores """ sig_t = sig_t.values np.fill_diagonal(sig_t, (1+lamb)) sig_t_inv = scipy.linalg.pinv(sig_t, rcond=rcond) if batch: condition_number = np.array([np.linalg.cond(sig_t)]*sig_i_t.shape[0]) correct_inversion = np.array([check_inversion(sig_t, sig_t_inv)]*sig_i_t.shape[0]) else: condition_number = np.linalg.cond(sig_t) correct_inversion = check_inversion(sig_t, sig_t_inv) var, ld_score = compute_var(sig_i_t, sig_t_inv, lamb, batch) mu = compute_mu(sig_i_t, sig_t_inv, zt) if np.any(mu > 30): print("ABERANT SNP SNiP") var_norm = var_in_boundaries(var, lamb) R2 = (1-var_norm) mu = mu / np.sqrt(R2) return({"var" : var, "mu" : mu, "ld_score" : ld_score, "condition_number" : condition_number, "correct_inversion":correct_inversion })