From defb0d7ab9313fadee29ba036542ced5466086db Mon Sep 17 00:00:00 2001 From: hanna julienne Date: Fri, 16 Mar 2018 18:37:06 +0100 Subject: [PATCH] debugging aberant snp --- impute_jass/impute_jass/stat_models.py | 30 ++++++++++++++++++++++---- impute_jass/impute_jass/windows.py | 13 ++++++++--- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/impute_jass/impute_jass/stat_models.py b/impute_jass/impute_jass/stat_models.py index e93b3ca..e4cc884 100644 --- a/impute_jass/impute_jass/stat_models.py +++ b/impute_jass/impute_jass/stat_models.py @@ -2,6 +2,7 @@ function for SNP imputation """ import numpy as np +import scipy as sc def compute_mu(sig_i_t, sig_t_inv, zt): return np.dot(sig_i_t, np.dot(sig_t_inv, zt)) @@ -9,20 +10,41 @@ def compute_mu(sig_i_t, sig_t_inv, zt): def compute_var(sig_i_t, sig_t_inv, lamb, batch=True): if batch: var = (1 + lamb) - np.einsum('ij,jk,ki->i', sig_i_t, sig_t_inv ,sig_i_t.transpose()) + ld_score = (sig_i_t**2).sum(1) + else: var = (1 + lamb) - np.dot(sig_i_t, np.dot(sig_t_inv, sig_i_t.transpose())) - return var + ld_score = (sig_i_t**2).sum() + + return var, ld_score + +def check_inversion(sig_t, sig_t_inv): + return np.allclose(sig_t, np.dot(sig_t, np.dot(sig_t_inv, sig_t))) def impg_model(zt, sig_t, sig_i_t, lamb=0.01, batch=True): """ Argument: zt : (vector) the vector of known Z scores """ + + + snps = sig_t.columns sig_t = sig_t.values + np.fill_diagonal(sig_t, (1+lamb)) + sig_t_inv = sc.linalg.pinv(sig_t) + + if batch: + condition_number = np.array([np.linalg.cond(sig_t)]*sig_i_t.shape[0]) + correct_inversion = np.array([check_inversion(sig_t, sig_t_inv)]*sig_i_t.shape[0]) + else: + condition_number = np.linalg.cond(sig_t) + correct_inversion = check_inversion(sig_t, sig_t_inv) + - sig_t_inv = np.linalg.pinv(sig_t) - var = compute_var(sig_i_t, sig_t_inv, lamb, batch) + var, ld_score = compute_var(sig_i_t, sig_t_inv, lamb, batch) mu = compute_mu(sig_i_t, sig_t_inv, zt) + if np.any(mu > 100): + print("ABERANT SNP SNiP ") #mu = mu / (((1+lamb)-var)**0.5) - return({"var":var, "mu":mu}) + return({"var":var, "mu":mu, "ld_score": ld_score, "condition_number":condition_number, "correct_inversion":correct_inversion }) diff --git a/impute_jass/impute_jass/windows.py b/impute_jass/impute_jass/windows.py index 3c0161e..dcb6bd3 100644 --- a/impute_jass/impute_jass/windows.py +++ b/impute_jass/impute_jass/windows.py @@ -106,10 +106,12 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, unknow else: Nwindows = 1 window_resize = np.ceil(int(end_ld_block) - (int(start_ld_block))) - + all_unknowns = ref_panel.loc[ref_panel.index.difference(zscore.index)] #zscore = pd.read_csv(Zfile, index_col=0, sep="\t") zscore = prepare_zscore_for_imputation(ref_panel, zscore) + zscore_results = zscore.copy(deep=True) + print("### Imputation of {0} snps ###".format(all_unknowns.shape[0])) for i in range(Nwindows): @@ -136,19 +138,24 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, unknow 'pos': ref_panel.loc[unknowns, 'pos'], 'A0': ref_panel.loc[unknowns, "Ref_all"], "A1": ref_panel.loc[unknowns,"alt_all"], + "ws_start":start_windows, + "ws_end":end_windows, "Z" : imp['mu'], "Var": imp["var"], + "ld_score" : imp["ld_score"], + "condition_number": imp['condition_number'], + "correct_inversion":imp["correct_inversion"], "Nsnp_to_impute" : len(known) }) # keep only snp in the core window start_windows = int(start_ld_block) + i*window_resize end_windows = int(start_ld_block) + (i+1)*window_resize in_core_window = in_region(batch_df.pos, start_windows, end_windows) - zscore = pd.concat([zscore, batch_df.loc[in_core_window]]) + zscore_results = pd.concat([zscore_results, batch_df.loc[in_core_window]]) i = i+1 if i%10 == 0: print("{0}\%".format(np.round(i/Nwindows,4))) - return zscore.sort_values(by="pos") + return zscore_results.sort_values(by="pos") -- GitLab