Commit 3ad69c2e authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

improved docstring in stat.py

parent 6e1b2fb0
......@@ -22,11 +22,29 @@ def compute_mu(sig_i_t, sig_t_inv, zt):
"""
Compute the estimation of z-score from neighborring snp
Args:
sig_i_t
sig_i_t (matrix?) : correlation matrix with line corresponding to
unknown Snp (snp to impute) and column to known SNPs
sig_t_inv (np.ndarray): inverse of the correlation matrix of known
matrix
zt (np.array?): Zscores of known snp
Returns:
mu_i (np.array): a vector of length i containing the estimate of zscore
"""
return np.dot(sig_i_t, np.dot(sig_t_inv, zt))
def compute_var(sig_i_t, sig_t_inv, lamb, batch=True):
"""
Compute the expected variance of the imputed SNPs
Args:
sig_i_t (matrix?) : correlation matrix with line corresponding to
unknown Snp (snp to impute) and column to known SNPs
sig_t_inv (np.ndarray): inverse of the correlation matrix of known
matrix
lamb (float): regularization term added to matrix
"""
if batch:
var = (1 + lamb) - np.einsum('ij,jk,ki->i', sig_i_t, sig_t_inv ,sig_i_t.transpose())
ld_score = (sig_i_t**2).sum(1)
......@@ -55,8 +73,16 @@ def var_in_boundaries(var,lamb):
def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
"""
Compute the variance
Args:
zt : (vector) the vector of known Z scores
zt (np.array): the vector of known Z scores
sig_t (np.ndarray) : the matrix of known Linkage desiquilibrium
correlation
sig_i_t (np.ndarray): inverse of the correlation matrix of known
matrix
lamb (float): regularization term added to the diagonal of the sig_t matrix
rcond (float): threshold to filter eigenvector with a eigenvalue under rcond
make inversion biased but much more numerically robust
"""
sig_t = sig_t.values
np.fill_diagonal(sig_t, (1+lamb))
......@@ -68,10 +94,8 @@ def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
else:
condition_number = np.linalg.cond(sig_t)
correct_inversion = check_inversion(sig_t, sig_t_inv)
var, ld_score = compute_var(sig_i_t, sig_t_inv, lamb, batch)
mu = compute_mu(sig_i_t, sig_t_inv, zt)
if np.any(mu > 30):
print("ABERANT SNP SNiP")
var_norm = var_in_boundaries(var, lamb)
......
......@@ -41,6 +41,7 @@ def prepare_zscore_for_imputation(ref_panel, zscore):
"""
Prepare the known Z score by realigning them on the reference ref_panel
the snps that are not present in the ref panel are filtered
"""
zscore = realigned_zfiles_on_panel(ref_panel, zscore)
zscore['Var'] = -1
......@@ -139,7 +140,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
in_core_window = in_region(batch_df.pos, start_core_window, end_core_window)
# keep only SNP with non negligible explained variance
snp_well_predicted = (batch_df.Var < 0.4)
snp_well_predicted = (batch_df.Var < 0.9)
batch_df_filt = batch_df.loc[in_core_window & snp_well_predicted, zscore_results.columns]
zscore_results = pd.concat([zscore_results, batch_df_filt])
i = i+1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment