imputation_launcher.py 3.25 KB
Newer Older
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
1
2
3
4
5
"""
Function set to launch imputation on a complete chromosome or
on the genome
"""
import glob
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
6
import pandas as pd
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
7
from .windows import ld_region_centered_window_imputation, impg_like_imputation, realigned_zfiles_on_panel
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
8

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
9
class ImputationLauncher(object):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
10
11
    """
    Class perform imputation of snp from summary statistic
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
12

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
13
    """
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
14
    def __init__(self, window_size=10000, imputation_style="online", buf=2500, lamb= 0.01, pinv_rcond = 0.01 ):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
15
16
17
18
19
20
21
22
23
24
25
26
        """

        Args:
            window_size (int): size of the imputation window in bp
            imputation_style (str): define if the windows while span the genome in a non overlapping fashion ("batch") or
                                    by being centered on each snp to impute ('online')
            buffer (int): the size of the padding around the windows of imputation (relevant only for batch imputation)
            lamb (float): size of the increment added to snp correlation matrices to make it less singular
            pinv_rcond (float): the rcond scipy.linalg.pinv function argument. The scipy.linalg.pinv is used to invert
             the correlationmatrices

        """
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
27
        self.imputation_style = imputation_style
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
28
        self.window_size = window_size
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
29
30
31
        self.buffer = buf
        self.lamb = lamb
        self.rcond = pinv_rcond
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
32

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
33
    def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
34
        """
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
35
        Impute the panel zscore score for one chromosome and with the specified parameters
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
36

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
37
38
39
40
41
42
43
        Args:
            chrom : str specifying chromosome
            zscore : known zscore
            ref_panel : location of the folder of reference chromosome
            ld_folder: location of linkage desiquilibrium matrices
        Returns
            Imputed zscore dataframe
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
44
45
        """
        pattern = "{0}/{1}*.ld".format(ld_folder, chrom)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
46
47
48
49
50
        if self.imputation_style == "online":
            def imputer(ld_file):
                return ld_region_centered_window_imputation(ld_file, ref_panel, zscore, self.window_size)
        elif self.imputation_style == "batch":
            def imputer(ld_file):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
51
                return impg_like_imputation(ld_file, ref_panel, zscore, self.window_size, self.buffer, self.lamb, self.rcond)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
52

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
53
54
55
56
57
58
        for ld_file in glob.glob(pattern):
            print("processing Region: {0}".format(ld_file))
            zscore = imputer(ld_file)

        zscore = realigned_zfiles_on_panel(ref_panel, zscore)
        return zscore
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72


    def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output):
        """
        Launch imputation on all chromosome

        """

        for i in range(1, 23):

            ref_panel_file = ref_panel_folder + "/chr" + str(i) + ".eur.1pct.bim"
            ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)


Hanna  JULIENNE's avatar
Hanna JULIENNE committed
73
            known_zscore_file = zscore_folder + "/z_" + gwas_tag + "_chr" + str(i) + ".txt"
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
74
75
76
77
78
            known_zscore = pd.read_csv(known_zscore_file, index_col=0, sep="\t")

            chrom = "chr"+str(i)
            z_imp = self.chromosome_imputation(chrom, known_zscore, ref_panel, ld_folder)

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
79
            imputed_zscore = folder_output + "/z_" + gwas_tag + "_chr" + str(i) + ".txt"
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
80
            z_imp.to_csv(imputed_zscore, sep="\t")