Commit 3d3d5eef by Hanna JULIENNE

### Fixed centered window imputation + stat models

parent 63689893
 ... @@ -3,7 +3,7 @@ function for SNP imputation ... @@ -3,7 +3,7 @@ function for SNP imputation """ """ import numpy as np import numpy as np def ImpG_model_batch(Zt, Sig_t, Sig_i_t): def ImpG_model_batch(Zt, Sig_t, Sig_i_t, lamb=0.01): """ """ Argument: Argument: Zt : (vector) the vector of known Z scores Zt : (vector) the vector of known Z scores ... @@ -11,8 +11,9 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t): ... @@ -11,8 +11,9 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t): #np.fill_diagonal(Sig_t.values, 1.01) #np.fill_diagonal(Sig_t.values, 1.01) #Sig_t.fillna(0, inplace=True) #Sig_t.fillna(0, inplace=True) Sig_t = Sig_t.values Sig_t_inv =np.linalg.inv(Sig_t) np.fill_diagonal(Sig_t, (1+lamb)) Sig_t_inv =np.linalg.pinv(Sig_t) Var = np.diag(Sig_t)[0] - np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose()) Var = np.diag(Sig_t)[0] - np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose()) ... @@ -21,16 +22,21 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t): ... @@ -21,16 +22,21 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t): return({"Var":Var, "mu":mu}) return({"Var":Var, "mu":mu}) def ImpG_model_snp(Zt, Sig_t, Sig_i_t): def ImpG_model_snp(Zt, Sig_t, Sig_i_t, lamb=0.01): """ """ Argument: Argument: Zt : (vector) the vector of known Z scores Zt : (vector) the vector of known Z scores """ """ #np.fill_diagonal(Sig_t.values, 1.01) #np.fill_diagonal(Sig_t.values, 1.01) #Sig_t.fillna(0, inplace=True) #Sig_t.fillna(0, inplace=True) Sig_t = Sig_t.values Sig_t_inv =np.linalg.inv(Sig_t) np.fill_diagonal(Sig_t, (1+lamb)) #I = np.identity(Sig_t.shape[0]) #Sig_t_inv =np.linalg.inv(Sig_t) Sig_t_inv = np.linalg.pinv(Sig_t) Var = np.diag(Sig_t)[0] - np.dot(Sig_i_t, np.dot(Sig_t_inv, Sig_i_t.transpose())) Var = np.diag(Sig_t)[0] - np.dot(Sig_i_t, np.dot(Sig_t_inv, Sig_i_t.transpose())) if Var< 0: Var=0 #np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose()) #np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose()) mu = np.dot(Sig_i_t, np.dot(Sig_t_inv, Zt)) mu = np.dot(Sig_i_t, np.dot(Sig_t_inv, Zt)) ... ...
 ... @@ -54,7 +54,7 @@ def prepare_Zscore_for_imputation(ref_panel, Zscores): ... @@ -54,7 +54,7 @@ def prepare_Zscore_for_imputation(ref_panel, Zscores): def in_region(pos_vector, start, end): def in_region(pos_vector, start, end): return ((start < pos_vector) & (pos_vector < end)) return ((start < pos_vector) & (pos_vector < end)) def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, unknowns=pd.Series([])): def Ld_region_centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, unknowns=pd.Series([])): """ """ Each missing Snp is imputed by known snp found in a window centered on the SNP to impute Each missing Snp is imputed by known snp found in a window centered on the SNP to impute Argument Argument ... @@ -80,11 +80,11 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, ... @@ -80,11 +80,11 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, for snp_unknown in unknowns: for snp_unknown in unknowns: # Boundary of the centered_window # Boundary of the centered_window start_pos = ref_panel.loc[snp_unknown,'pos'] - window_size start_pos = max((ref_panel.loc[snp_unknown,'pos'] - window_size), start_ld_block) end_pos = ref_panel.loc[snp_unknown,'pos'] + window_size end_pos = min(ref_panel.loc[snp_unknown,'pos'] + window_size, end_ld_block) #print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block) #print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block) in_LD_reg_n_window = in_region(Zscores.pos, int(start_ld_block), int(end_ld_block)) & in_region(Zscores.pos, start_pos, end_pos) in_LD_reg_n_window = in_region(Zscores.pos, start_pos, end_pos) known = Zscores.loc[in_LD_reg_n_window].index known = Zscores.loc[in_LD_reg_n_window].index Sig_t = LD_mat.loc[known, known] Sig_t = LD_mat.loc[known, known] ... @@ -93,14 +93,13 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, ... @@ -93,14 +93,13 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, if(len(known) > 0): if(len(known) > 0): imp = ImpG_model_snp(Zt, Sig_t, Sig_i_t) imp = ImpG_model_snp(Zt, Sig_t, Sig_i_t) Zscores.loc[snp_unknown, ["pos", "A0", "A1"]] = ref_panel.loc[snp_unknown, ['pos', "Ref_all", "alt_all"]] Zscores.loc[snp_unknown, "pos"] = ref_panel.loc[snp_unknown, 'pos'] Zscores.loc[snp_unknown, "A0"] = ref_panel.loc[snp_unknown, "Ref_all"] Zscores.loc[snp_unknown, "A1"] = ref_panel.loc[snp_unknown, "alt_all"] Zscores.loc[snp_unknown, "Z"] = imp['mu'] Zscores.loc[snp_unknown, "Z"] = imp['mu'] Zscores.loc[snp_unknown, "Var"] = imp['Var'] Zscores.loc[snp_unknown, "Var"] = imp['Var'] Zscores.loc[snp_unknown, 'Nsnp_to_impute'] = len(known) Zscores.loc[snp_unknown, 'Nsnp_to_impute'] = len(known) i = i+1 i = i+1 if i%100 == 0: if i%100 == 0: print("{0}\%".format(np.round(i/N_snp,4))) print("{0}\%".format(np.round(i/N_snp,4))) return Zscores.sort_values(by="pos") return Zscores.sort_values(by="pos")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!