Commit defb0d7a authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

debugging aberant snp

parent e413b780
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
function for SNP imputation function for SNP imputation
""" """
import numpy as np import numpy as np
import scipy as sc
def compute_mu(sig_i_t, sig_t_inv, zt): def compute_mu(sig_i_t, sig_t_inv, zt):
return np.dot(sig_i_t, np.dot(sig_t_inv, zt)) return np.dot(sig_i_t, np.dot(sig_t_inv, zt))
...@@ -9,20 +10,41 @@ def compute_mu(sig_i_t, sig_t_inv, zt): ...@@ -9,20 +10,41 @@ def compute_mu(sig_i_t, sig_t_inv, zt):
def compute_var(sig_i_t, sig_t_inv, lamb, batch=True): def compute_var(sig_i_t, sig_t_inv, lamb, batch=True):
if batch: if batch:
var = (1 + lamb) - np.einsum('ij,jk,ki->i', sig_i_t, sig_t_inv ,sig_i_t.transpose()) var = (1 + lamb) - np.einsum('ij,jk,ki->i', sig_i_t, sig_t_inv ,sig_i_t.transpose())
ld_score = (sig_i_t**2).sum(1)
else: else:
var = (1 + lamb) - np.dot(sig_i_t, np.dot(sig_t_inv, sig_i_t.transpose())) var = (1 + lamb) - np.dot(sig_i_t, np.dot(sig_t_inv, sig_i_t.transpose()))
return var ld_score = (sig_i_t**2).sum()
return var, ld_score
def check_inversion(sig_t, sig_t_inv):
return np.allclose(sig_t, np.dot(sig_t, np.dot(sig_t_inv, sig_t)))
def impg_model(zt, sig_t, sig_i_t, lamb=0.01, batch=True): def impg_model(zt, sig_t, sig_i_t, lamb=0.01, batch=True):
""" """
Argument: Argument:
zt : (vector) the vector of known Z scores zt : (vector) the vector of known Z scores
""" """
snps = sig_t.columns
sig_t = sig_t.values sig_t = sig_t.values
np.fill_diagonal(sig_t, (1+lamb)) np.fill_diagonal(sig_t, (1+lamb))
sig_t_inv = sc.linalg.pinv(sig_t)
if batch:
condition_number = np.array([np.linalg.cond(sig_t)]*sig_i_t.shape[0])
correct_inversion = np.array([check_inversion(sig_t, sig_t_inv)]*sig_i_t.shape[0])
else:
condition_number = np.linalg.cond(sig_t)
correct_inversion = check_inversion(sig_t, sig_t_inv)
sig_t_inv = np.linalg.pinv(sig_t) var, ld_score = compute_var(sig_i_t, sig_t_inv, lamb, batch)
var = compute_var(sig_i_t, sig_t_inv, lamb, batch)
mu = compute_mu(sig_i_t, sig_t_inv, zt) mu = compute_mu(sig_i_t, sig_t_inv, zt)
if np.any(mu > 100):
print("ABERANT SNP SNiP ")
#mu = mu / (((1+lamb)-var)**0.5) #mu = mu / (((1+lamb)-var)**0.5)
return({"var":var, "mu":mu}) return({"var":var, "mu":mu, "ld_score": ld_score, "condition_number":condition_number, "correct_inversion":correct_inversion })
...@@ -106,10 +106,12 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, unknow ...@@ -106,10 +106,12 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, unknow
else: else:
Nwindows = 1 Nwindows = 1
window_resize = np.ceil(int(end_ld_block) - (int(start_ld_block))) window_resize = np.ceil(int(end_ld_block) - (int(start_ld_block)))
all_unknowns = ref_panel.loc[ref_panel.index.difference(zscore.index)] all_unknowns = ref_panel.loc[ref_panel.index.difference(zscore.index)]
#zscore = pd.read_csv(Zfile, index_col=0, sep="\t") #zscore = pd.read_csv(Zfile, index_col=0, sep="\t")
zscore = prepare_zscore_for_imputation(ref_panel, zscore) zscore = prepare_zscore_for_imputation(ref_panel, zscore)
zscore_results = zscore.copy(deep=True)
print("### Imputation of {0} snps ###".format(all_unknowns.shape[0])) print("### Imputation of {0} snps ###".format(all_unknowns.shape[0]))
for i in range(Nwindows): for i in range(Nwindows):
...@@ -136,19 +138,24 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, unknow ...@@ -136,19 +138,24 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, unknow
'pos': ref_panel.loc[unknowns, 'pos'], 'pos': ref_panel.loc[unknowns, 'pos'],
'A0': ref_panel.loc[unknowns, "Ref_all"], 'A0': ref_panel.loc[unknowns, "Ref_all"],
"A1": ref_panel.loc[unknowns,"alt_all"], "A1": ref_panel.loc[unknowns,"alt_all"],
"ws_start":start_windows,
"ws_end":end_windows,
"Z" : imp['mu'], "Z" : imp['mu'],
"Var": imp["var"], "Var": imp["var"],
"ld_score" : imp["ld_score"],
"condition_number": imp['condition_number'],
"correct_inversion":imp["correct_inversion"],
"Nsnp_to_impute" : len(known) "Nsnp_to_impute" : len(known)
}) })
# keep only snp in the core window # keep only snp in the core window
start_windows = int(start_ld_block) + i*window_resize start_windows = int(start_ld_block) + i*window_resize
end_windows = int(start_ld_block) + (i+1)*window_resize end_windows = int(start_ld_block) + (i+1)*window_resize
in_core_window = in_region(batch_df.pos, start_windows, end_windows) in_core_window = in_region(batch_df.pos, start_windows, end_windows)
zscore = pd.concat([zscore, batch_df.loc[in_core_window]]) zscore_results = pd.concat([zscore_results, batch_df.loc[in_core_window]])
i = i+1 i = i+1
if i%10 == 0: if i%10 == 0:
print("{0}\%".format(np.round(i/Nwindows,4))) print("{0}\%".format(np.round(i/Nwindows,4)))
return zscore.sort_values(by="pos") return zscore_results.sort_values(by="pos")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment