Commit 15f2105f authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

change p-link LD matrix parameter so storage size is lower

parents 7b5d6a49 911021e1
import impute_jass.ld_matrix as LD import impute_jass.ld_matrix as LD
import impute_jass.stat_models as model
...@@ -18,15 +18,21 @@ def launch_plink_ld(startpos, endpos, chr, reffile, folder): ...@@ -18,15 +18,21 @@ def launch_plink_ld(startpos, endpos, chr, reffile, folder):
""" """
launch plink ld launch plink ld
""" """
bimref = reffile + ".bim"
ref_panel = pd.read_csv(bimref, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
ref_panel = ref_panel.loc[(ref_panel.pos > startpos) & (ref_panel.pos < endpos)]
ref_panel.index.to_series().to_csv("./snp_list.txt", index=False)
fo = "{0}/chr{1}_{2}_{3}".format(folder, chr, startpos, endpos) fo = "{0}/chr{1}_{2}_{3}".format(folder, chr, startpos, endpos)
cmd = "p-link --noweb --bfile {0} --r --ld-window-r2 0 --from-bp {1} --to-bp {2} --chr {3} --out {4}".format(reffile, startpos, endpos, chr, fo)
#print(cmd) cmd = "p-link --noweb --bfile {0} --r --ld-snp-list ./snp_list.txt --ld-window 50 --ld-window-kb 3000 --ld-window-r2 0.4 --chr {1} --out {2}".format(reffile, chr, fo)
print(cmd)
sub.check_output(cmd, shell=True) sub.check_output(cmd, shell=True)
def generate_sparse_matrix(plink_ld): def generate_sparse_matrix(plink_ld, ref_chr_df):
""" """
read plink results create a sparse dataframe LD-matrix read plink results create a sparse dataframe LD-matrix
then save it to a zipped pickle then save it to a zipped pickle
...@@ -38,10 +44,13 @@ def generate_sparse_matrix(plink_ld): ...@@ -38,10 +44,13 @@ def generate_sparse_matrix(plink_ld):
mat_ld = mat_ld.reindex(index=un_index, columns=un_index) mat_ld = mat_ld.reindex(index=un_index, columns=un_index)
mat_ld.fillna(0, inplace=True) mat_ld.fillna(0, inplace=True)
sym = mat_ld.values + mat_ld.values.transpose()
sym = np.maximum(mat_ld.values,mat_ld.values.transpose())
np.fill_diagonal(sym, 1.01) np.fill_diagonal(sym, 1.01)
mat_ld = pd.DataFrame(sym, index=mat_ld.index, columns=mat_ld.columns)
mat_ld = pd.DataFrame(sym, index=mat_ld.index, columns=mat_ld.columns)
re_index = ref_chr_df.loc[mat_ld.index].sort_values(by="pos").index
mat_ld = mat_ld.loc[re_index, re_index]
# mat_ld = pd.DataFrame(np.maximum(mat_ld.values, mat_ld.values.transpose()), index=un_index, columns=un_index) # mat_ld = pd.DataFrame(np.maximum(mat_ld.values, mat_ld.values.transpose()), index=un_index, columns=un_index)
mat_ld = mat_ld.to_sparse() mat_ld = mat_ld.to_sparse()
return mat_ld return mat_ld
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
function for SNP imputation function for SNP imputation
""" """
import numpy import numpy as np
def ImpG_model(Zt, Sig_t, Sig_i_t): def ImpG_model_batch(Zt, Sig_t, Sig_i_t):
""" """
Argument: Argument:
Zt : (vector) the vector of known Z scores Zt : (vector) the vector of known Z scores
...@@ -18,5 +18,24 @@ def ImpG_model(Zt, Sig_t, Sig_i_t): ...@@ -18,5 +18,24 @@ def ImpG_model(Zt, Sig_t, Sig_i_t):
Var = np.diag(Sig_t)[0] - np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose()) Var = np.diag(Sig_t)[0] - np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose())
mu = np.dot(Sig_i_t, np.dot(Sig_t_inv, Zfile.loc[index_known, 'Z'])) mu = np.dot(Sig_i_t, np.dot(Sig_t_inv, Zt))
return((Var, mu)) return({"Var":Var, "mu":mu})
def ImpG_model_snp(Zt, Sig_t, Sig_i_t):
"""
Argument:
Zt : (vector) the vector of known Z scores
"""
np.fill_diagonal(Sig_t.values, 1.01)
Sig_t.fillna(0, inplace=True)
Sig_t_inv =np.linalg.inv(Sig_t)
Var = np.diag(Sig_t)[0] - np.dot(Sig_i_t, np.dot(Sig_t_inv, Sig_i_t.transpose()))
#np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose())
mu = np.dot(Sig_i_t, np.dot(Sig_t_inv, Zt))
return({"Var":Var, "mu":mu})
"""
implement the imputation window is sliding along the genome:
- ImpG like: Non overlapping windows, the imputation is apply in batch to unknown snp in the window
- Sliding: A sliding window centered on the Snp to impute
"""
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment