Commit d8d5e567 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

imputation launcher class

parent 3d3d5eef
import impute_jass.ld_matrix as LD import impute_jass.ld_matrix as LD
import impute_jass.stat_models as model import impute_jass.stat_models as model
import impute_jass.windows import impute_jass.windows
from impute_jass.imputation_launcher import imputation_launcher
"""
Function set to launch imputation on a complete chromosome or
on the genome
"""
import glob
from .windows import Ld_region_centered_window_imputation
class imputation_launcher:
def __init__(self, window_size=10000):
self.imputation_style = "online"
self.window_size = window_size
def chromosome_imputation(self, chrom, Zscores, ref_panel, ld_folder):
"""
"""
ref_panel_file = "/mnt/atlas/PCMA/1._DATA/ImpG_refpanel/{0}.eur.1pct.bim".format(chrom)
#ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
pattern = "{0}/{1}*.ld".format(ld_folder, chrom)
for LD_file in glob.glob(pattern):
print("processing Region: {0}".format(LD_file))
Zscores = Ld_region_centered_window_imputation(LD_file, ref_panel, Zscores, self.window_size)
return Zscores
...@@ -54,15 +54,13 @@ def prepare_Zscore_for_imputation(ref_panel, Zscores): ...@@ -54,15 +54,13 @@ def prepare_Zscore_for_imputation(ref_panel, Zscores):
def in_region(pos_vector, start, end): def in_region(pos_vector, start, end):
return ((start < pos_vector) & (pos_vector < end)) return ((start < pos_vector) & (pos_vector < end))
def Ld_region_centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size, unknowns=pd.Series([])): def Ld_region_centered_window_imputation(LD_file, ref_panel, Zscores, window_size, unknowns=pd.Series([])):
""" """
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument Argument
""" """
(chrom, start_ld_block, end_ld_block) = parse_region_position(LD_file) (chrom, start_ld_block, end_ld_block) = parse_region_position(LD_file)
ref_panel_file = "/mnt/atlas/PCMA/1._DATA/ImpG_refpanel/{0}.eur.1pct.bim".format(chrom)
ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
LD_mat = generate_sparse_matrix(LD_file, ref_panel) LD_mat = generate_sparse_matrix(LD_file, ref_panel)
...@@ -79,9 +77,9 @@ def Ld_region_centered_window_imputation(LD_file, ref_panel_folder, Zscores, win ...@@ -79,9 +77,9 @@ def Ld_region_centered_window_imputation(LD_file, ref_panel_folder, Zscores, win
for snp_unknown in unknowns: for snp_unknown in unknowns:
# Boundary of the centered_window # Boundary of the centered_window
#print(((ref_panel.loc[snp_unknown,'pos'] - window_size), float(start_ld_block)))
start_pos = max((ref_panel.loc[snp_unknown,'pos'] - window_size), start_ld_block) start_pos = max((ref_panel.loc[snp_unknown,'pos'] - window_size), float(start_ld_block))
end_pos = min(ref_panel.loc[snp_unknown,'pos'] + window_size, end_ld_block) end_pos = min(ref_panel.loc[snp_unknown,'pos'] + window_size, float(end_ld_block))
#print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block) #print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block)
in_LD_reg_n_window = in_region(Zscores.pos, start_pos, end_pos) in_LD_reg_n_window = in_region(Zscores.pos, start_pos, end_pos)
...@@ -102,4 +100,5 @@ def Ld_region_centered_window_imputation(LD_file, ref_panel_folder, Zscores, win ...@@ -102,4 +100,5 @@ def Ld_region_centered_window_imputation(LD_file, ref_panel_folder, Zscores, win
i = i+1 i = i+1
if i%100 == 0: if i%100 == 0:
print("{0}\%".format(np.round(i/N_snp,4))) print("{0}\%".format(np.round(i/N_snp,4)))
print(Zscores.head(10))
return Zscores.sort_values(by="pos") return Zscores.sort_values(by="pos")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment