Commit 98125ff6 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

improved performance report

parent 8f919ee4
......@@ -67,17 +67,26 @@ def imputation_performance(zscore_initial, zscore_imputed, masked):
masked : SNPs ids which have been masked by imputation
"""
try:
N_masked = len(masked)
masked = zscore_imputed.index.intersection(masked)
fraction_imputed = 1.0-zscore_imputed.loc[masked, "Z"].isnull().mean()
N_imputed = len(masked)
fraction_imputed = N_imputed / N_masked
#cor = zscore_initial.loc[masked, "Z"].corr(zscore_imputed.loc[masked, "Z"].fillna(0))
cor = zscore_initial.loc[masked, "Z"].corr(zscore_imputed.loc[masked, "Z"])
MAE = (zscore_initial.loc[masked, "Z"] - zscore_imputed.loc[masked, "Z"]).dropna().abs().mean()
return {'N_SNP':len(masked),'fraction_imputed':fraction_imputed, 'cor':cor, 'mean_absolute_error':MAE}
except KeyError:
error_abs= (zscore_initial.loc[masked, "Z"] - zscore_imputed.loc[masked, "Z"]).dropna().abs()
MAE = error_abs.mean()
error_quantile = error_abs.quantile([0,0.5,1])
SNP_max_error = error_abs.idxmax()
return {'N_SNP':len(masked),'fraction_imputed':fraction_imputed, 'cor':cor,
'mean_absolute_error':MAE, 'median_absolute_error':error_quantile.loc[0.5],
'min_absolute_error':error_quantile.loc[0.0],'max_absolute_error':error_quantile.loc[1.0], "SNP_max_error":SNP_max_error}
except (ValueError,KeyError) as e:
print(e) # If KeyError none of the masked_SNP are in the imputed dataframe
res = np.nan
return {'N_SNP':np.nan, 'fraction_imputed': np.nan, 'cor':np.nan, 'mean_absolute_error':np.nan}
return {'N_SNP':np.nan, 'fraction_imputed': np.nan, 'cor':np.nan, 'mean_absolute_error':np.nan, 'median_absolute_error':np.nan,
'min_absolute_error':np.nan,'max_absolute_error':np.nan, "SNP_max_error":np.nan}
def z_amplitude_effect(zscore_folder, masked_folder, output_folder, ref_folder,
ld_folder, gwas,ref_panel_preffix="",ref_panel_suffix=".eur.1pct.bim",
......@@ -192,7 +201,8 @@ def grid_search(zscore_folder, masked_folder, output_folder,
n_cpu = multiprocessing.cpu_count()
Parallel(n_jobs=n_cpu)(delayed(run_imputation)(rd) for rd in eigen_ratio_grid)
R2_serie = pd.DataFrame({"cor":np.nan, "mean_absolute_error":np.nan, "fraction_imputed":np.nan}, index = eigen_ratio_grid)
R2_serie = pd.DataFrame({'N_SNP':np.nan, 'fraction_imputed': np.nan, 'cor':np.nan, 'mean_absolute_error':np.nan, 'median_absolute_error':np.nan,
'min_absolute_error':np.nan,'max_absolute_error':np.nan, "SNP_max_error":np.nan}, index = eigen_ratio_grid)
for rd in eigen_ratio_grid:
z_output = "{0}/z_{1}_{2}_{3}.txt".format(output_folder, gwas, chrom, rd)
......@@ -204,10 +214,15 @@ def grid_search(zscore_folder, masked_folder, output_folder,
print(e)
res = np.nan
R2_serie.loc[rd, 'N_SNP'] = res["N_SNP"]
R2_serie.loc[rd, 'cor'] = res["cor"]
R2_serie.loc[rd, 'mean_absolute_error'] = res["mean_absolute_error"]
R2_serie.loc[rd, 'fraction_imputed'] = res["fraction_imputed"]
R2_serie.loc[rd, 'median_absolute_error'] = res["median_absolute_error"]
R2_serie.loc[rd, 'min_absolute_error'] = res["min_absolute_error"]
R2_serie.loc[rd, 'max_absolute_error'] = res["max_absolute_error"]
R2_serie.loc[rd, 'SNP_max_error'] = res["SNP_max_error"]
print(len(masked_SNP))
print("Result for rd {0} = cor: {1}, fraction_imputed: {2}".format(rd, res["cor"], res["fraction_imputed"] ))
print("Result for rd {0} = cor: {1}, fraction_imputed: {2}".format(rd, res["cor"], res["fraction_imputed"]))
return(R2_serie)
......@@ -63,7 +63,7 @@ def var_in_boundaries(var,lamb):
id_neg = np.where(var < 0)
var_norm = var
var[id_neg] = 0
id_inf = np.where(var > (1+lamb))
id_inf = np.where(var > (0.99999+lamb))
var[id_inf] = 1
return var
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment