Commit 3d3d5eef authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Fixed centered window imputation + stat models

parent 63689893
...@@ -3,7 +3,7 @@ function for SNP imputation ...@@ -3,7 +3,7 @@ function for SNP imputation
""" """
import numpy as np import numpy as np
def ImpG_model_batch(Zt, Sig_t, Sig_i_t): def ImpG_model_batch(Zt, Sig_t, Sig_i_t, lamb=0.01):
""" """
Argument: Argument:
Zt : (vector) the vector of known Z scores Zt : (vector) the vector of known Z scores
...@@ -11,8 +11,9 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t): ...@@ -11,8 +11,9 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t):
#np.fill_diagonal(Sig_t.values, 1.01) #np.fill_diagonal(Sig_t.values, 1.01)
#Sig_t.fillna(0, inplace=True) #Sig_t.fillna(0, inplace=True)
Sig_t = Sig_t.values
Sig_t_inv =np.linalg.inv(Sig_t) np.fill_diagonal(Sig_t, (1+lamb))
Sig_t_inv =np.linalg.pinv(Sig_t)
Var = np.diag(Sig_t)[0] - np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose()) Var = np.diag(Sig_t)[0] - np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose())
...@@ -21,16 +22,21 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t): ...@@ -21,16 +22,21 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t):
return({"Var":Var, "mu":mu}) return({"Var":Var, "mu":mu})
def ImpG_model_snp(Zt, Sig_t, Sig_i_t): def ImpG_model_snp(Zt, Sig_t, Sig_i_t, lamb=0.01):
""" """
Argument: Argument:
Zt : (vector) the vector of known Z scores Zt : (vector) the vector of known Z scores
""" """
#np.fill_diagonal(Sig_t.values, 1.01) #np.fill_diagonal(Sig_t.values, 1.01)
#Sig_t.fillna(0, inplace=True) #Sig_t.fillna(0, inplace=True)
Sig_t = Sig_t.values
Sig_t_inv =np.linalg.inv(Sig_t) np.fill_diagonal(Sig_t, (1+lamb))
#I = np.identity(Sig_t.shape[0])
#Sig_t_inv =np.linalg.inv(Sig_t)
Sig_t_inv = np.linalg.pinv(Sig_t)
Var = np.diag(Sig_t)[0] - np.dot(Sig_i_t, np.dot(Sig_t_inv, Sig_i_t.transpose())) Var = np.diag(Sig_t)[0] - np.dot(Sig_i_t, np.dot(Sig_t_inv, Sig_i_t.transpose()))
if Var< 0:
Var=0
#np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose()) #np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose())
mu = np.dot(Sig_i_t, np.dot(Sig_t_inv, Zt)) mu = np.dot(Sig_i_t, np.dot(Sig_t_inv, Zt))
......
...@@ -54,7 +54,7 @@ def prepare_Zscore_for_imputation(ref_panel, Zscores): ...@@ -54,7 +54,7 @@ def prepare_Zscore_for_imputation(ref_panel, Zscores):
def in_region(pos_vector, start, end): def in_region(pos_vector, start, end):
return ((start < pos_vector) & (pos_vector < end)) return ((start < pos_vector) & (pos_vector < end))
def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, unknowns=pd.Series([])): def Ld_region_centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, unknowns=pd.Series([])):
""" """
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument Argument
...@@ -80,11 +80,11 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, ...@@ -80,11 +80,11 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size,
for snp_unknown in unknowns: for snp_unknown in unknowns:
# Boundary of the centered_window # Boundary of the centered_window
start_pos = ref_panel.loc[snp_unknown,'pos'] - window_size start_pos = max((ref_panel.loc[snp_unknown,'pos'] - window_size), start_ld_block)
end_pos = ref_panel.loc[snp_unknown,'pos'] + window_size end_pos = min(ref_panel.loc[snp_unknown,'pos'] + window_size, end_ld_block)
#print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block) #print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block)
in_LD_reg_n_window = in_region(Zscores.pos, int(start_ld_block), int(end_ld_block)) & in_region(Zscores.pos, start_pos, end_pos) in_LD_reg_n_window = in_region(Zscores.pos, start_pos, end_pos)
known = Zscores.loc[in_LD_reg_n_window].index known = Zscores.loc[in_LD_reg_n_window].index
Sig_t = LD_mat.loc[known, known] Sig_t = LD_mat.loc[known, known]
...@@ -93,14 +93,13 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, ...@@ -93,14 +93,13 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size,
if(len(known) > 0): if(len(known) > 0):
imp = ImpG_model_snp(Zt, Sig_t, Sig_i_t) imp = ImpG_model_snp(Zt, Sig_t, Sig_i_t)
Zscores.loc[snp_unknown, ["pos", "A0", "A1"]] = ref_panel.loc[snp_unknown, ['pos', "Ref_all", "alt_all"]] Zscores.loc[snp_unknown, "pos"] = ref_panel.loc[snp_unknown, 'pos']
Zscores.loc[snp_unknown, "A0"] = ref_panel.loc[snp_unknown, "Ref_all"]
Zscores.loc[snp_unknown, "A1"] = ref_panel.loc[snp_unknown, "alt_all"]
Zscores.loc[snp_unknown, "Z"] = imp['mu'] Zscores.loc[snp_unknown, "Z"] = imp['mu']
Zscores.loc[snp_unknown, "Var"] = imp['Var'] Zscores.loc[snp_unknown, "Var"] = imp['Var']
Zscores.loc[snp_unknown, 'Nsnp_to_impute'] = len(known) Zscores.loc[snp_unknown, 'Nsnp_to_impute'] = len(known)
i = i+1 i = i+1
if i%100 == 0: if i%100 == 0:
print("{0}\%".format(np.round(i/N_snp,4))) print("{0}\%".format(np.round(i/N_snp,4)))
return Zscores.sort_values(by="pos") return Zscores.sort_values(by="pos")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment