Commit 47cb3a06 authored by Hanna  JULIENNE's avatar Hanna JULIENNE

add parameter to specify reference panel suffix

parent af5f978a
Pipeline #11204 passed with stages
in 1 minute and 13 seconds
......@@ -52,7 +52,7 @@ def imputation_performance(zscore_initial, zscore_imputed, masked):
res = np.nan
return {'fraction_imputed': np.nan, 'cor':np.nan}
def z_amplitude_effect(zscore_folder, masked_folder, output_folder, ref_folder, ld_folder, gwas, z_treshold = [0, 1.0, 2.0, 3.0, 4.0, 5], window_size= 500000, buffer_size=125000, eigen_ratio = 0.1, chrom="chr22", l2_regularization=0.1, R2_threshold=0.6):
def z_amplitude_effect(zscore_folder, masked_folder, output_folder, ref_folder, ld_folder, gwas, z_treshold = [0, 1.0, 2.0, 3.0, 4.0, 5], window_size= 500000, buffer_size=125000, eigen_ratio = 0.1, chrom="chr22", l2_regularization=0.1, R2_threshold=0.6, N_to_mask=5000):
"""
Compute the imputation performance on SNPs with different amplitude
The procedure is the following:
......@@ -73,6 +73,7 @@ def z_amplitude_effect(zscore_folder, masked_folder, output_folder, ref_folder,
z_treshold (list) : float list to select Z score to mask above Z score
eigen_ratio (float): rcond parameter (must be between 0 and 1)
window_size, buffer_size, l2_regularization, R2_threshold : imputation parameter (see raiss command line documentation)
N_to_mask (int): Number of SNPs masked in the initial dataset to compute the correlation between true value and imputed value
"""
z_file = "{0}/z_{1}_{2}.txt".format(zscore_folder, gwas, chrom)
zscore = pd.read_csv(z_file, index_col=0, sep="\t")
......@@ -107,7 +108,7 @@ def z_amplitude_effect(zscore_folder, masked_folder, output_folder, ref_folder,
return(R2_serie)
def grid_search(zscore_folder, masked_folder, output_folder, ref_folder, ld_folder, gwas, chrom="chr22", eigen_ratio_grid = [0.5, 0.1, 0.01], window_size= 500000, buffer_size=125000, l2_regularization=0.1, R2_threshold=0.6):
def grid_search(zscore_folder, masked_folder, output_folder, ref_folder, ld_folder, gwas, chrom="chr22", eigen_ratio_grid = [0.5, 0.1, 0.01], window_size= 500000, buffer_size=125000, l2_regularization=0.1, R2_threshold=0.6, N_to_mask=5000):
"""
Compute the imputation performance for several eigen ratioself.
The procedure is the following:
......@@ -126,13 +127,14 @@ def grid_search(zscore_folder, masked_folder, output_folder, ref_folder, ld_fold
chrom (str): chromosome in the format "chr.."
eigen_ratio_grid (list): list of eigen_ratio to test (must be between 0 and 1)
window_size, buffer_size, l2_regularization, R2_threshold : imputation parameter (see raiss command line documentation)
N_to_mask (int): Number of SNPs masked in the initial dataset to compute the correlation between true value and imputed value
"""
z_file = "{0}/z_{1}_{2}.txt".format(zscore_folder, gwas, chrom)
z_output = "{0}/z_{1}_{2}.txt".format(output_folder, gwas, chrom)
dat_orig = pd.read_csv(z_file, sep="\t", index_col=0)
res_masked = generated_test_data(dat_orig)
res_masked = generated_test_data(dat_orig, N_to_mask)
z_masked = res_masked[0]
z_masked_file = "{0}/z_{1}_{2}.txt".format(masked_folder, gwas, chrom)
z_masked.to_csv(z_masked_file, sep="\t")
......@@ -145,8 +147,7 @@ def grid_search(zscore_folder, masked_folder, output_folder, ref_folder, ld_fold
n_cpu = multiprocessing.cpu_count()
Parallel(n_jobs=n_cpu)(delayed(run_imputation)(rd) for rd in eigen_ratio_grid)
R2_serie = pd.DataFrame({"cor":np.nan, "cor_on_imputed":np.nan,
"fraction_imputed":np.nan}, index = eigen_ratio_grid)
R2_serie = pd.DataFrame({"cor":np.nan, "fraction_imputed":np.nan}, index = eigen_ratio_grid)
for rd in eigen_ratio_grid:
z_output = "{0}/z_{1}_{2}_{3}.txt".format(output_folder, gwas, chrom, rd)
......
......@@ -7,7 +7,7 @@ import pandas as pd
from raiss.filter_format_output import filter_output
from raiss.imputation_launcher import ImputationLauncher
def save_chromosome_imputation(gwas, chrom, window_size, buffer_size, l2_regularization, eigen_threshold, zscore_folder, ref_folder, ld_folder, output_folder, R2_threshold, tag=""):
def save_chromosome_imputation(gwas, chrom, window_size, buffer_size, l2_regularization, eigen_threshold, zscore_folder, ref_folder, ld_folder, output_folder, R2_threshold, tag="", ref_panel_suffix=".eur.1pct.bim"):
"""
module to manage the creation of files to save the results of imputation
Args:
......@@ -29,7 +29,7 @@ def save_chromosome_imputation(gwas, chrom, window_size, buffer_size, l2_regular
# Reading of inputs
z_file = "{0}/z_{1}_{2}.txt".format(zscore_folder, gwas, chrom)
zscore = pd.read_csv(z_file, index_col=0, sep="\t")
ref_panel_file = ref_folder + "/"+ chrom +".eur.1pct.bim"
ref_panel_file = ref_folder + "/"+ chrom + ref_panel_suffix
ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
# imputation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment