Skip to content
Snippets Groups Projects
Commit 31083099 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Merge branch 'dev' into 'master'

Dev

See merge request !11
parents 419e0cd9 80dae27d
No related branches found
No related tags found
1 merge request!11Dev
Pipeline #120388 failed
...@@ -15,7 +15,7 @@ class ImputationLauncher(object): ...@@ -15,7 +15,7 @@ class ImputationLauncher(object):
Class to perform imputation of snp from summary statistic Class to perform imputation of snp from summary statistic
""" """
def __init__(self, window_size=10000, buf=2500, def __init__(self, window_size=10000, buf=2500,
lamb= 0.01, pinv_rcond = 0.01, ld_type="plink"): lamb= 0.01, pinv_rtol = 0.01, ld_type="plink"):
""" """
Initialise the imputation object. Fix the windows size, the buffer size Initialise the imputation object. Fix the windows size, the buffer size
and the kind of imputation employed and the kind of imputation employed
...@@ -26,14 +26,14 @@ class ImputationLauncher(object): ...@@ -26,14 +26,14 @@ class ImputationLauncher(object):
imputation (relevant only for batch imputation) imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation lamb (float): size of the increment added to snp correlation
matrices to make it less singular matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument. pinv_rtol (float): the rtol scipy.linalg.pinv function argument.
The scipy.linalg.pinv is used to invert the correlation matrices The scipy.linalg.pinv is used to invert the correlation matrices
""" """
self.window_size = window_size self.window_size = window_size
self.buffer = buf self.buffer = buf
self.lamb = lamb self.lamb = lamb
self.rcond = pinv_rcond self.rtol = pinv_rtol
self.ld_type = ld_type self.ld_type = ld_type
def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder): def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
...@@ -63,7 +63,7 @@ class ImputationLauncher(object): ...@@ -63,7 +63,7 @@ class ImputationLauncher(object):
def imputer(ld_file): def imputer(ld_file):
return impg_like_imputation(ld_file, ref_panel, zscore, return impg_like_imputation(ld_file, ref_panel, zscore,
self.window_size, self.buffer, self.window_size, self.buffer,
self.lamb, self.rcond, self.ld_type) self.lamb, self.rtol, self.ld_type)
ld_file_list = set(map(get_file_name, glob.glob(pattern))) ld_file_list = set(map(get_file_name, glob.glob(pattern)))
for ld_file in ld_file_list: for ld_file in ld_file_list:
......
...@@ -28,7 +28,7 @@ def save_chromosome_imputation(gwas, chrom, window_size, buffer_size, ...@@ -28,7 +28,7 @@ def save_chromosome_imputation(gwas, chrom, window_size, buffer_size,
print("Imputation of {0} gwas for chromosome {1}".format(gwas, chrom)) print("Imputation of {0} gwas for chromosome {1}".format(gwas, chrom))
# Imputer settings # Imputer settings
imputer = ImputationLauncher( window_size=int(window_size), buf=int(buffer_size), lamb= float(l2_regularization), pinv_rcond = float(eigen_threshold), ld_type = ld_type) imputer = ImputationLauncher( window_size=int(window_size), buf=int(buffer_size), lamb= float(l2_regularization), pinv_rtol = float(eigen_threshold), ld_type = ld_type)
# Reading of inputs # Reading of inputs
z_file = "{0}/z_{1}_{2}.txt".format(zscore_folder, gwas, chrom) z_file = "{0}/z_{1}_{2}.txt".format(zscore_folder, gwas, chrom)
zscore = pd.read_csv(z_file, index_col=0, sep="\t") zscore = pd.read_csv(z_file, index_col=0, sep="\t")
......
...@@ -77,9 +77,22 @@ def report_snps(res_dict, trait, filename="raiss_report"): ...@@ -77,9 +77,22 @@ def report_snps(res_dict, trait, filename="raiss_report"):
f.write(res_dict['R2_quality'].to_string()) f.write(res_dict['R2_quality'].to_string())
f.close() f.close()
def report_snps_csv(res_dict, trait, filename="raiss_report"):
# I save the log in three .csv files.
df = pd.DataFrame({'Number of SNPs in harmonized file': [res_dict["total_harmonized"]],
'Number of SNPs in imputed file': [res_dict["total_imputed"]],
'Proportion imputed': [res_dict['proportion']]})
df.to_csv("{}_{}_Number_of_SNPs.csv".format(filename, trait), index=False)
# Number of SNPs by level of significance
res_dict['signif_count'].to_csv("{}_{}_Number_of_SNPs_by_level_of_significance.csv".format(filename, trait))
# Number of imputed SNPs by level of imputation quality
res_dict['R2_quality'].to_csv = "{}_{}_Number_of_SNPs_by_level_of_imputation_quality.csv".format(filename, trait)
def raiss_report(trait, harmonized_folder, imputed_folder, filename="raiss_report", chr_list=range(1,23)): def raiss_report(trait, harmonized_folder, imputed_folder, filename="raiss_report", chr_list=range(1,23)):
""" """
Function to compute a report for one trait Function to compute a report for one trait
""" """
res_dict = count_snps(trait, harmonized_folder, imputed_folder, chr_list) res_dict = count_snps(trait, harmonized_folder, imputed_folder, chr_list)
report_snps(res_dict, trait, filename) report_snps_csv(res_dict, trait, filename)
...@@ -68,15 +68,15 @@ def var_in_boundaries(var,lamb): ...@@ -68,15 +68,15 @@ def var_in_boundaries(var,lamb):
return var return var
def invert_sig_t(sig_t, lamb, rcond): def invert_sig_t(sig_t, lamb, rtol):
try: try:
np.fill_diagonal(sig_t.values, (1+lamb)) np.fill_diagonal(sig_t.values, (1+lamb))
sig_t_inv = scipy.linalg.pinv(sig_t, rcond=rcond) sig_t_inv = scipy.linalg.pinv(sig_t, rtol=rtol,atol=0)
return(sig_t_inv) return(sig_t_inv)
except np.linalg.LinAlgError: except np.linalg.LinAlgError:
invert_sig_t(sig_t, lamb*1.1, rcond*1.1) invert_sig_t(sig_t, lamb*1.1, rtol*1.1)
def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True): def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rtol=0.01, batch=True):
""" """
Compute the variance Compute the variance
Args: Args:
...@@ -85,10 +85,10 @@ def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True): ...@@ -85,10 +85,10 @@ def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
correlation correlation
sig_i_t (np.ndarray): correlation matrix of known matrix sig_i_t (np.ndarray): correlation matrix of known matrix
lamb (float): regularization term added to the diagonal of the sig_t matrix lamb (float): regularization term added to the diagonal of the sig_t matrix
rcond (float): threshold to filter eigenvector with a eigenvalue under rcond rtol (float): threshold to filter eigenvector with a eigenvalue under rtol
make inversion biased but much more numerically robust make inversion biased but much more numerically robust
""" """
sig_t_inv = invert_sig_t(sig_t, lamb, rcond) sig_t_inv = invert_sig_t(sig_t, lamb, rtol)
if sig_t_inv is None: if sig_t_inv is None:
return None return None
else: else:
......
...@@ -133,7 +133,7 @@ def empty_imputed_dataframe(): ...@@ -133,7 +133,7 @@ def empty_imputed_dataframe():
def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
rcond, ld_file_type = "plink"):# unknowns=pd.Series([])): rtol, ld_file_type = "plink"):# unknowns=pd.Series([])):
""" """
Each missing Snp is imputed by known snps found in a window Each missing Snp is imputed by known snps found in a window
Argument. Argument.
...@@ -175,7 +175,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, ...@@ -175,7 +175,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
if(len(known) > 0): if(len(known) > 0):
print("Imputation for window {0} - {1}".format(start_windows,end_windows )) print("Imputation for window {0} - {1}".format(start_windows,end_windows ))
imp = raiss_model(zt, sig_t, sig_i_t, lamb=lamb, rcond=rcond, batch=True) imp = raiss_model(zt, sig_t, sig_i_t, lamb=lamb, rtol=rtol, batch=True)
try: try:
batch_df = format_result_df(imp, ref_panel, known, unknowns) batch_df = format_result_df(imp, ref_panel, known, unknowns)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment