Commit 4b88f317 authored by hjulienn's avatar hjulienn
Browse files

update imputation_R2

parent ee6223d1
Pipeline #48331 passed with stages
in 1 minute and 11 seconds
......@@ -29,13 +29,14 @@ def generated_test_data(zscore, N_to_mask=5000, condition=None, stratifying_vec
if isinstance(condition, pd.Series)==True:
masked = np.random.choice(zscore.index[condition], N_to_mask, replace=False)
else:
inter_id = zscore.index.intersection(stratifying_vector.index).drop_duplicates(keep='first', inplace=True)
stratifying_vector = stratifying_vector.loc[inter_id]
if isinstance(stratifying_vector, pd.Series)==True:
masked = []
binned = np.digitize(stratifying_vector, stratifying_bins)
N_bins = len(stratifying_vector)-1
for i in range(N_bins):
masked = masked + list(np.random.choice(zscore.index[(binned==(i+1))], N_to_mask // N_bins, replace=False))
masked = masked + list(np.random.choice(inter_id[(binned==(i+1))], N_to_mask // N_bins, replace=False))
masked = np.array(masked)
else:
masked = np.random.choice(zscore.index, N_to_mask, replace=False)
......@@ -100,7 +101,7 @@ def z_amplitude_effect(zscore_folder, masked_folder, output_folder, ref_folder,
z_output = "{0}/z_{1}_{2}.txt".format(output_folder, gwas, chrom)
dat_orig = pd.read_csv(z_file, sep="\t", index_col=0)
dat_orig.drop_duplicates(keep='first', inplace=True)
def run_imputation(z, i):
print("Z score threshold : {}".format(z))
condition = (dat_orig.Z.abs() > z)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment