Skip to content
Snippets Groups Projects
Commit 80dae27d authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

change rcond to rtol : fixed lack of backward compatibility of scipy

parent 86c31e4f
No related branches found
No related tags found
1 merge request!11Dev
......@@ -15,7 +15,7 @@ class ImputationLauncher(object):
Class to perform imputation of snp from summary statistic
"""
def __init__(self, window_size=10000, buf=2500,
lamb= 0.01, pinv_rcond = 0.01, ld_type="plink"):
lamb= 0.01, pinv_rtol = 0.01, ld_type="plink"):
"""
Initialise the imputation object. Fix the windows size, the buffer size
and the kind of imputation employed
......@@ -26,14 +26,14 @@ class ImputationLauncher(object):
imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation
matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument.
pinv_rtol (float): the rtol scipy.linalg.pinv function argument.
The scipy.linalg.pinv is used to invert the correlation matrices
"""
self.window_size = window_size
self.buffer = buf
self.lamb = lamb
self.rcond = pinv_rcond
self.rtol = pinv_rtol
self.ld_type = ld_type
def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
......@@ -63,7 +63,7 @@ class ImputationLauncher(object):
def imputer(ld_file):
return impg_like_imputation(ld_file, ref_panel, zscore,
self.window_size, self.buffer,
self.lamb, self.rcond, self.ld_type)
self.lamb, self.rtol, self.ld_type)
ld_file_list = set(map(get_file_name, glob.glob(pattern)))
for ld_file in ld_file_list:
......
......@@ -28,7 +28,7 @@ def save_chromosome_imputation(gwas, chrom, window_size, buffer_size,
print("Imputation of {0} gwas for chromosome {1}".format(gwas, chrom))
# Imputer settings
imputer = ImputationLauncher( window_size=int(window_size), buf=int(buffer_size), lamb= float(l2_regularization), pinv_rcond = float(eigen_threshold), ld_type = ld_type)
imputer = ImputationLauncher( window_size=int(window_size), buf=int(buffer_size), lamb= float(l2_regularization), pinv_rtol = float(eigen_threshold), ld_type = ld_type)
# Reading of inputs
z_file = "{0}/z_{1}_{2}.txt".format(zscore_folder, gwas, chrom)
zscore = pd.read_csv(z_file, index_col=0, sep="\t")
......
......@@ -68,15 +68,15 @@ def var_in_boundaries(var,lamb):
return var
def invert_sig_t(sig_t, lamb, rcond):
def invert_sig_t(sig_t, lamb, rtol):
try:
np.fill_diagonal(sig_t.values, (1+lamb))
sig_t_inv = scipy.linalg.pinv(sig_t, rcond=rcond)
sig_t_inv = scipy.linalg.pinv(sig_t, rtol=rtol,atol=0)
return(sig_t_inv)
except np.linalg.LinAlgError:
invert_sig_t(sig_t, lamb*1.1, rcond*1.1)
invert_sig_t(sig_t, lamb*1.1, rtol*1.1)
def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rtol=0.01, batch=True):
"""
Compute the variance
Args:
......@@ -85,10 +85,10 @@ def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
correlation
sig_i_t (np.ndarray): correlation matrix of known matrix
lamb (float): regularization term added to the diagonal of the sig_t matrix
rcond (float): threshold to filter eigenvector with a eigenvalue under rcond
rtol (float): threshold to filter eigenvector with a eigenvalue under rtol
make inversion biased but much more numerically robust
"""
sig_t_inv = invert_sig_t(sig_t, lamb, rcond)
sig_t_inv = invert_sig_t(sig_t, lamb, rtol)
if sig_t_inv is None:
return None
else:
......
......@@ -133,7 +133,7 @@ def empty_imputed_dataframe():
def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
rcond, ld_file_type = "plink"):# unknowns=pd.Series([])):
rtol, ld_file_type = "plink"):# unknowns=pd.Series([])):
"""
Each missing Snp is imputed by known snps found in a window
Argument.
......@@ -175,7 +175,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
if(len(known) > 0):
print("Imputation for window {0} - {1}".format(start_windows,end_windows ))
imp = raiss_model(zt, sig_t, sig_i_t, lamb=lamb, rcond=rcond, batch=True)
imp = raiss_model(zt, sig_t, sig_i_t, lamb=lamb, rtol=rtol, batch=True)
try:
batch_df = format_result_df(imp, ref_panel, known, unknowns)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment