diff --git a/raiss/imputation_launcher.py b/raiss/imputation_launcher.py index 80684ae7b48ea61e63cbf54adad234b42f402996..8f9c4932012774ba6d05f16b585197d5d3eedf2e 100644 --- a/raiss/imputation_launcher.py +++ b/raiss/imputation_launcher.py @@ -15,7 +15,7 @@ class ImputationLauncher(object): Class to perform imputation of snp from summary statistic """ def __init__(self, window_size=10000, buf=2500, - lamb= 0.01, pinv_rcond = 0.01, ld_type="plink"): + lamb= 0.01, pinv_rtol = 0.01, ld_type="plink"): """ Initialise the imputation object. Fix the windows size, the buffer size and the kind of imputation employed @@ -26,14 +26,14 @@ class ImputationLauncher(object): imputation (relevant only for batch imputation) lamb (float): size of the increment added to snp correlation matrices to make it less singular - pinv_rcond (float): the rcond scipy.linalg.pinv function argument. + pinv_rtol (float): the rtol scipy.linalg.pinv function argument. The scipy.linalg.pinv is used to invert the correlation matrices """ self.window_size = window_size self.buffer = buf self.lamb = lamb - self.rcond = pinv_rcond + self.rtol = pinv_rtol self.ld_type = ld_type def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder): @@ -63,7 +63,7 @@ class ImputationLauncher(object): def imputer(ld_file): return impg_like_imputation(ld_file, ref_panel, zscore, self.window_size, self.buffer, - self.lamb, self.rcond, self.ld_type) + self.lamb, self.rtol, self.ld_type) ld_file_list = set(map(get_file_name, glob.glob(pattern))) for ld_file in ld_file_list: diff --git a/raiss/pipes.py b/raiss/pipes.py index 8b7eec47952facd7b1c37bc39504e9696abe0be2..ec8995da9788a73b6d11272d1cce0286d7a5e55b 100644 --- a/raiss/pipes.py +++ b/raiss/pipes.py @@ -28,7 +28,7 @@ def save_chromosome_imputation(gwas, chrom, window_size, buffer_size, print("Imputation of {0} gwas for chromosome {1}".format(gwas, chrom)) # Imputer settings - imputer = ImputationLauncher( window_size=int(window_size), buf=int(buffer_size), lamb= float(l2_regularization), pinv_rcond = float(eigen_threshold), ld_type = ld_type) + imputer = ImputationLauncher( window_size=int(window_size), buf=int(buffer_size), lamb= float(l2_regularization), pinv_rtol = float(eigen_threshold), ld_type = ld_type) # Reading of inputs z_file = "{0}/z_{1}_{2}.txt".format(zscore_folder, gwas, chrom) zscore = pd.read_csv(z_file, index_col=0, sep="\t") diff --git a/raiss/stat_models.py b/raiss/stat_models.py index cf4858402a58f78998487848adbe1f851525abb8..4be8842f0b30561be83880bc51e5f8068e3f197f 100644 --- a/raiss/stat_models.py +++ b/raiss/stat_models.py @@ -68,15 +68,15 @@ def var_in_boundaries(var,lamb): return var -def invert_sig_t(sig_t, lamb, rcond): +def invert_sig_t(sig_t, lamb, rtol): try: np.fill_diagonal(sig_t.values, (1+lamb)) - sig_t_inv = scipy.linalg.pinv(sig_t, rcond=rcond) + sig_t_inv = scipy.linalg.pinv(sig_t, rtol=rtol,atol=0) return(sig_t_inv) except np.linalg.LinAlgError: - invert_sig_t(sig_t, lamb*1.1, rcond*1.1) + invert_sig_t(sig_t, lamb*1.1, rtol*1.1) -def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True): +def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rtol=0.01, batch=True): """ Compute the variance Args: @@ -85,10 +85,10 @@ def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True): correlation sig_i_t (np.ndarray): correlation matrix of known matrix lamb (float): regularization term added to the diagonal of the sig_t matrix - rcond (float): threshold to filter eigenvector with a eigenvalue under rcond + rtol (float): threshold to filter eigenvector with a eigenvalue under rtol make inversion biased but much more numerically robust """ - sig_t_inv = invert_sig_t(sig_t, lamb, rcond) + sig_t_inv = invert_sig_t(sig_t, lamb, rtol) if sig_t_inv is None: return None else: diff --git a/raiss/windows.py b/raiss/windows.py index e583bb4feeac7a74926e7e9bfb2e377f4d6bbfa5..7965d467b20b8cd3875de2fc3b92a9016a02a9f4 100644 --- a/raiss/windows.py +++ b/raiss/windows.py @@ -133,7 +133,7 @@ def empty_imputed_dataframe(): def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, - rcond, ld_file_type = "plink"):# unknowns=pd.Series([])): + rtol, ld_file_type = "plink"):# unknowns=pd.Series([])): """ Each missing Snp is imputed by known snps found in a window Argument. @@ -175,7 +175,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, if(len(known) > 0): print("Imputation for window {0} - {1}".format(start_windows,end_windows )) - imp = raiss_model(zt, sig_t, sig_i_t, lamb=lamb, rcond=rcond, batch=True) + imp = raiss_model(zt, sig_t, sig_i_t, lamb=lamb, rtol=rtol, batch=True) try: batch_df = format_result_df(imp, ref_panel, known, unknowns)