### improved docstring in stat.py

parent 6e1b2fb0
 ... ... @@ -22,11 +22,29 @@ def compute_mu(sig_i_t, sig_t_inv, zt): """ Compute the estimation of z-score from neighborring snp Args: sig_i_t sig_i_t (matrix?) : correlation matrix with line corresponding to unknown Snp (snp to impute) and column to known SNPs sig_t_inv (np.ndarray): inverse of the correlation matrix of known matrix zt (np.array?): Zscores of known snp Returns: mu_i (np.array): a vector of length i containing the estimate of zscore """ return np.dot(sig_i_t, np.dot(sig_t_inv, zt)) def compute_var(sig_i_t, sig_t_inv, lamb, batch=True): """ Compute the expected variance of the imputed SNPs Args: sig_i_t (matrix?) : correlation matrix with line corresponding to unknown Snp (snp to impute) and column to known SNPs sig_t_inv (np.ndarray): inverse of the correlation matrix of known matrix lamb (float): regularization term added to matrix """ if batch: var = (1 + lamb) - np.einsum('ij,jk,ki->i', sig_i_t, sig_t_inv ,sig_i_t.transpose()) ld_score = (sig_i_t**2).sum(1) ... ... @@ -55,8 +73,16 @@ def var_in_boundaries(var,lamb): def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True): """ Compute the variance Args: zt : (vector) the vector of known Z scores zt (np.array): the vector of known Z scores sig_t (np.ndarray) : the matrix of known Linkage desiquilibrium correlation sig_i_t (np.ndarray): inverse of the correlation matrix of known matrix lamb (float): regularization term added to the diagonal of the sig_t matrix rcond (float): threshold to filter eigenvector with a eigenvalue under rcond make inversion biased but much more numerically robust """ sig_t = sig_t.values np.fill_diagonal(sig_t, (1+lamb)) ... ... @@ -68,10 +94,8 @@ def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True): else: condition_number = np.linalg.cond(sig_t) correct_inversion = check_inversion(sig_t, sig_t_inv) var, ld_score = compute_var(sig_i_t, sig_t_inv, lamb, batch) mu = compute_mu(sig_i_t, sig_t_inv, zt) if np.any(mu > 30): print("ABERANT SNP SNiP") var_norm = var_in_boundaries(var, lamb) ... ...
 ... ... @@ -41,6 +41,7 @@ def prepare_zscore_for_imputation(ref_panel, zscore): """ Prepare the known Z score by realigning them on the reference ref_panel the snps that are not present in the ref panel are filtered """ zscore = realigned_zfiles_on_panel(ref_panel, zscore) zscore['Var'] = -1 ... ... @@ -139,7 +140,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, in_core_window = in_region(batch_df.pos, start_core_window, end_core_window) # keep only SNP with non negligible explained variance snp_well_predicted = (batch_df.Var < 0.4) snp_well_predicted = (batch_df.Var < 0.9) batch_df_filt = batch_df.loc[in_core_window & snp_well_predicted, zscore_results.columns] zscore_results = pd.concat([zscore_results, batch_df_filt]) i = i+1 ... ...
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment