imputation_launcher.py 1.39 KB
Newer Older
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
1
2
3
4
5
"""
Function set to launch imputation on a complete chromosome or
on the genome
"""
import glob
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
6
from .windows import ld_region_centered_window_imputation, impg_like_imputation, realigned_zfiles_on_panel
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
7

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
8
class ImputationLauncher(object):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
9

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
10
11
    def __init__(self, window_size=10000, imputation_style="online", buffer=2500):
        self.imputation_style = imputation_style
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
12
        self.window_size = window_size
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
13
        self.buffer = buffer
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
14

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
15
    def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
16
17
18
19
20
21
22
        """

        """
        ref_panel_file = "/mnt/atlas/PCMA/1._DATA/ImpG_refpanel/{0}.eur.1pct.bim".format(chrom)
        #ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)

        pattern = "{0}/{1}*.ld".format(ld_folder, chrom)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
23
24
25
26
27
28
        if self.imputation_style == "online":
            def imputer(ld_file):
                return ld_region_centered_window_imputation(ld_file, ref_panel, zscore, self.window_size)
        elif self.imputation_style == "batch":
            def imputer(ld_file):
                return impg_like_imputation(ld_file, ref_panel, zscore, self.window_size, self.buffer)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
29

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
30
31
32
33
34
35
36
        for ld_file in glob.glob(pattern):
            print("processing Region: {0}".format(ld_file))
            zscore = imputer(ld_file)

        zscore = realigned_zfiles_on_panel(ref_panel, zscore)

        return zscore