Commit 1090c897 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Merge branch 'bug_fix' into 'master'

Bug fix

See merge request !6
parents 9138e16a 45937810
Pipeline #51123 passed with stages
in 1 minute and 48 seconds
......@@ -17,7 +17,8 @@ def launch_chromosome_imputation(args):
save_chromosome_imputation(args.gwas, args.chrom, args.window_size,
args.buffer_size, args.l2_regularization, args.eigen_threshold,
args.zscore_folder, args.ref_folder, args.ld_folder, args.output_folder,
args.R2_threshold, ref_panel_suffix=args.ref_panel_suffix, ld_type=args.ld_type)
args.R2_threshold, ref_panel_suffix=args.ref_panel_suffix,
ld_type=args.ld_type, minimum_ld=args.minimum_ld)
def add_chromosome_imputation_argument():
......@@ -38,6 +39,7 @@ def add_chromosome_imputation_argument():
parser.add_argument('--R2-threshold', help= "R square (imputation quality) threshold bellow which SNPs are filtered from the output", default = 0.6)
parser.add_argument("--ld-type", help= "Ld can be supplied as plink command --ld-snp-list output files (see raiss.ld_matrix.launch_plink_ld to compute these data using plink) or as a couple of a scipy sparse matrix (.npz )and an .csv containing SNPs index", default="plink")
parser.add_argument('--ref-panel-suffix', help= "end of the suffix for the reference panel files", default = ".bim")
parser.add_argument('--minimum-ld', help = "this parameter ensure that their is enough typed SNPs around the imputed to perform a high accuracy imputation", default = 4)
parser.set_defaults(func=launch_chromosome_imputation)
return(parser)
......
......@@ -4,7 +4,7 @@
"""
def filter_output(zscores, fout, R2_threshold = 0.6, minimum_ld = 20):
def filter_output(zscores, fout, R2_threshold = 0.6, minimum_ld = 5):
"""
procedure that format output for JASS
......@@ -20,5 +20,6 @@ def filter_output(zscores, fout, R2_threshold = 0.6, minimum_ld = 20):
chr_fo = zscores[['index', 'pos', 'A0', 'A1', 'Z', 'Var', "ld_score"]]
chr_fo["imputation_R2"] = 1-chr_fo["Var"]
chr_fo.columns = ['rsID','pos','A0','A1','Z', 'Var', "ld_score", "imputation_R2"]
chr_fo.loc[(chr_fo.Var < (1-R2_threshold)) & (chr_fo.ld_score > minimum_ld)].to_csv(fout, sep="\t", index=False)
print(chr_fo.shape)
print(chr_fo.loc[(chr_fo.imputation_R2 > R2_threshold) & (chr_fo.ld_score > float(minimum_ld))].shape)
chr_fo.loc[(chr_fo.imputation_R2 > R2_threshold) & (chr_fo.ld_score > float(minimum_ld))].to_csv(fout, sep="\t", index=False)
......@@ -39,6 +39,7 @@ def generated_test_data(zscore, N_to_mask=5000, condition=None, stratifying_vec
print(np.unique(binned))
print(inter_id[(binned==(1))])
print(N_to_mask // N_bins)
for i in range(N_bins):
print(i)
print(np.where(binned==(i+1)))
......
......@@ -74,17 +74,18 @@ def load_plink_ld(plink_ld, ref_chr_df):
plink_ld = plink_ld + ".ld"
plink_ld = pd.read_csv(plink_ld, sep = "\s+")
mat_ld = plink_ld.pivot(index='SNP_A', columns='SNP_B', values='R')
un_index = mat_ld.index.union(mat_ld.columns)
un_index = mat_ld.index.intersection(mat_ld.columns)
mat_ld = mat_ld.reindex(index=un_index, columns=un_index)
mat_ld.fillna(0, inplace=True)
sym = np.where(np.abs(mat_ld.values) > np.abs(mat_ld.values.transpose()), mat_ld.values, mat_ld.values.transpose())
mat_ld = pd.DataFrame(sym, index=mat_ld.index, columns=mat_ld.columns)
int_index = ref_chr_df.index.intersection(mat_ld.index)
print(int_index)
re_index = ref_chr_df.loc[int_index].sort_values(by="pos").index
mat_ld = mat_ld.loc[re_index, re_index]
print(mat_ld.iloc[1:5, 1:5])
return mat_ld
def load_sparse_matrix(path_sparse_LD, ref_chr_df):
......@@ -120,7 +121,6 @@ def generate_genome_matrices(region_files, reffolder, folder_output, suffix = ""
"""
regions = pd.read_csv(region_files)
for reg in regions.iterrows():
print(reg[0])
# input reference panel file
fi_ref = "{0}/{1}.{2}".format(reffolder, reg[1]['chr'], suffix)
......
......@@ -10,7 +10,7 @@ from raiss.imputation_launcher import ImputationLauncher
def save_chromosome_imputation(gwas, chrom, window_size, buffer_size,
l2_regularization, eigen_threshold, zscore_folder,
ref_folder, ld_folder, output_folder, R2_threshold,
tag="", ref_panel_suffix=".eur.1pct.bim", ld_type="plink"):
tag="", ref_panel_suffix=".eur.1pct.bim", ld_type="plink", minimum_ld=4):
"""
module to manage the creation of files to save the results of imputation
Args:
......@@ -43,5 +43,5 @@ def save_chromosome_imputation(gwas, chrom, window_size, buffer_size,
# and Saving results
minimum_ld = 20
z_fo = "{0}/z_{1}_{2}{3}.txt".format(output_folder, gwas, chrom, tag)
filter_output(imputed_zscore, z_fo, float(R2_threshold))
filter_output(imputed_zscore, z_fo, float(R2_threshold), minimum_ld=float(minimum_ld))
print("Save imputation done at {0}".format(z_fo))
......@@ -53,11 +53,6 @@ def compute_var(sig_i_t, sig_t_inv, lamb, batch=True):
return var, ld_score
def check_inversion(sig_t, sig_t_inv):
print("sig_t")
print(sig_t)
print("sig_t_inv")
print(sig_t_inv)
return np.allclose(sig_t, np.dot(sig_t, np.dot(sig_t_inv, sig_t)))
def var_in_boundaries(var,lamb):
......@@ -109,5 +104,6 @@ def raiss_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
var_norm = var_in_boundaries(var, lamb)
R2 = ((1+lamb)-var_norm)
print(R2)
mu = mu / np.sqrt(R2)
return({"var" : var, "mu" : mu, "ld_score" : ld_score, "condition_number" : condition_number, "correct_inversion":correct_inversion })
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment