imputation_launcher.py 2.46 KB
Newer Older
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
1
2
3
4
5
"""
Function set to launch imputation on a complete chromosome or
on the genome
"""
import glob
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
6
from .windows import ld_region_centered_window_imputation, impg_like_imputation, realigned_zfiles_on_panel
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
7

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
8
class ImputationLauncher(object):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
9

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
10
    def __init__(self, window_size=10000, imputation_style="online", buf=2500, lamb= 0.01, pinv_rcond = 0.01 ):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
11
        self.imputation_style = imputation_style
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
12
        self.window_size = window_size
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
13
14
15
        self.buffer = buf
        self.lamb = lamb
        self.rcond = pinv_rcond
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
16

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
17
    def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
18
        """
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
19
        Impute the panel zscore score for one chromosome and with the specified parameters
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
20

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
21
22
23
24
25
26
27
        Args:
            chrom : str specifying chromosome
            zscore : known zscore
            ref_panel : location of the folder of reference chromosome
            ld_folder: location of linkage desiquilibrium matrices
        Returns
            Imputed zscore dataframe
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
28
29
        """
        pattern = "{0}/{1}*.ld".format(ld_folder, chrom)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
30
31
32
33
34
        if self.imputation_style == "online":
            def imputer(ld_file):
                return ld_region_centered_window_imputation(ld_file, ref_panel, zscore, self.window_size)
        elif self.imputation_style == "batch":
            def imputer(ld_file):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
35
                return impg_like_imputation(ld_file, ref_panel, zscore, self.window_size, self.buffer, self.lamb, self.rcond)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
36

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
37
38
39
40
41
42
        for ld_file in glob.glob(pattern):
            print("processing Region: {0}".format(ld_file))
            zscore = imputer(ld_file)

        zscore = realigned_zfiles_on_panel(ref_panel, zscore)
        return zscore
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64


    def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output):
        """
        Launch imputation on all chromosome

        """

        for i in range(1, 23):

            ref_panel_file = ref_panel_folder + "/chr" + str(i) + ".eur.1pct.bim"
            ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)


            known_zscore_file = zscore_folder + "/z_" + tag + "chr" + str(i) + ".txt"
            known_zscore = pd.read_csv(known_zscore_file, index_col=0, sep="\t")

            chrom = "chr"+str(i)
            z_imp = self.chromosome_imputation(chrom, known_zscore, ref_panel, ld_folder)

            imputed_zscore = folder_output + "/z_" + tag + "chr" + str(i) + ".txt"
            z_imp.to_csv(imputed_zscore, sep="\t")