Commit da779b03 authored by Hanna  JULIENNE's avatar Hanna JULIENNE

added comments

parent f07a787a
......@@ -3,11 +3,27 @@ Function set to launch imputation on a complete chromosome or
on the genome
"""
import glob
import pandas as pd
from .windows import ld_region_centered_window_imputation, impg_like_imputation, realigned_zfiles_on_panel
class ImputationLauncher(object):
"""
Class perform imputation of snp from summary statistic
"""
def __init__(self, window_size=10000, imputation_style="online", buf=2500, lamb= 0.01, pinv_rcond = 0.01 ):
"""
Args:
window_size (int): size of the imputation window in bp
imputation_style (str): define if the windows while span the genome in a non overlapping fashion ("batch") or
by being centered on each snp to impute ('online')
buffer (int): the size of the padding around the windows of imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument. The scipy.linalg.pinv is used to invert
the correlationmatrices
"""
self.imputation_style = imputation_style
self.window_size = window_size
self.buffer = buf
......@@ -54,11 +70,11 @@ class ImputationLauncher(object):
ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
known_zscore_file = zscore_folder + "/z_" + tag + "chr" + str(i) + ".txt"
known_zscore_file = zscore_folder + "/z_" + gwas_tag + "_chr" + str(i) + ".txt"
known_zscore = pd.read_csv(known_zscore_file, index_col=0, sep="\t")
chrom = "chr"+str(i)
z_imp = self.chromosome_imputation(chrom, known_zscore, ref_panel, ld_folder)
imputed_zscore = folder_output + "/z_" + tag + "chr" + str(i) + ".txt"
imputed_zscore = folder_output + "/z_" + gwas_tag + "_chr" + str(i) + ".txt"
z_imp.to_csv(imputed_zscore, sep="\t")
......@@ -44,9 +44,7 @@ def generate_sparse_matrix(plink_ld, ref_chr_df):
mat_ld = mat_ld.reindex(index=un_index, columns=un_index)
mat_ld.fillna(0, inplace=True)
sym = np.maximum(mat_ld.values,mat_ld.values.transpose())
np.fill_diagonal(sym, 1.01)
mat_ld = pd.DataFrame(sym, index=mat_ld.index, columns=mat_ld.columns)
re_index = ref_chr_df.loc[mat_ld.index].sort_values(by="pos").index
......
......@@ -26,10 +26,9 @@ def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
Argument:
zt : (vector) the vector of known Z scores
"""
snps = sig_t.columns
sig_t = sig_t.values
np.fill_diagonal(sig_t, (1+lamb))
sig_t_inv = sc.linalg.pinv(sig_t, rcond=rcond)
sig_t_inv = sc.linalg.pinv(sig_t)#, rcond=rcond)
if batch:
condition_number = np.array([np.linalg.cond(sig_t)]*sig_i_t.shape[0])
......@@ -38,10 +37,9 @@ def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
condition_number = np.linalg.cond(sig_t)
correct_inversion = check_inversion(sig_t, sig_t_inv)
var, ld_score = compute_var(sig_i_t, sig_t_inv, lamb, batch)
mu = compute_mu(sig_i_t, sig_t_inv, zt)
if np.any(mu > 50):
if np.any(mu > 30):
print("ABERANT SNP SNiP ")
#mu = mu / (((1+lamb)-var)**0.5)
return({"var":var, "mu":mu, "ld_score": ld_score, "condition_number":condition_number, "correct_inversion":correct_inversion })
return({"var" : var, "mu" : mu, "ld_score" : ld_score, "condition_number" : condition_number, "correct_inversion":correct_inversion })
......@@ -10,4 +10,13 @@ setup(name='impute_jass',
#package_dir = {'': 'jass_preprocessing'},
packages= ['impute_jass'],
package_data = {'impute_jass':'./data/*.csv'},
zip_safe=False)
zip_safe=False
entry_points={
'console_scripts' : [
'impute_chromosome = '
]
}
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment