imputation_launcher.py 3.25 KB
Newer Older
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
1
2
3
4
5
"""
Function set to launch imputation on a complete chromosome or
on the genome
"""
import glob
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
6
import pandas as pd
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
7
from .windows import ld_region_centered_window_imputation, impg_like_imputation, realigned_zfiles_on_panel
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
8

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
9
10


Hanna  JULIENNE's avatar
Hanna JULIENNE committed
11
class ImputationLauncher(object):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
12
13
    """
    Class perform imputation of snp from summary statistic
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
14

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
15
    """
16
    def __init__(self, window_size=10000, imputation_style="batch", buf=2500, lamb= 0.01, pinv_rcond = 0.01 ):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
17
18
19
20
21
22
23
24
25
26
27
28
        """

        Args:
            window_size (int): size of the imputation window in bp
            imputation_style (str): define if the windows while span the genome in a non overlapping fashion ("batch") or
                                    by being centered on each snp to impute ('online')
            buffer (int): the size of the padding around the windows of imputation (relevant only for batch imputation)
            lamb (float): size of the increment added to snp correlation matrices to make it less singular
            pinv_rcond (float): the rcond scipy.linalg.pinv function argument. The scipy.linalg.pinv is used to invert
             the correlationmatrices

        """
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
29
        self.imputation_style = imputation_style
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
30
        self.window_size = window_size
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
31
32
33
        self.buffer = buf
        self.lamb = lamb
        self.rcond = pinv_rcond
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
34

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
35
    def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
36
        """
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
37
        Impute the panel zscore score for one chromosome and with the specified parameters
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
38

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
39
40
41
42
43
44
45
        Args:
            chrom : str specifying chromosome
            zscore : known zscore
            ref_panel : location of the folder of reference chromosome
            ld_folder: location of linkage desiquilibrium matrices
        Returns
            Imputed zscore dataframe
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
46
47
        """
        pattern = "{0}/{1}*.ld".format(ld_folder, chrom)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
48
49
50
51
52
        if self.imputation_style == "online":
            def imputer(ld_file):
                return ld_region_centered_window_imputation(ld_file, ref_panel, zscore, self.window_size)
        elif self.imputation_style == "batch":
            def imputer(ld_file):
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
53
                return impg_like_imputation(ld_file, ref_panel, zscore, self.window_size, self.buffer, self.lamb, self.rcond)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
54

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
55
56
57
58
59
60
        for ld_file in glob.glob(pattern):
            print("processing Region: {0}".format(ld_file))
            zscore = imputer(ld_file)

        zscore = realigned_zfiles_on_panel(ref_panel, zscore)
        return zscore
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74


    def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output):
        """
        Launch imputation on all chromosome

        """

        for i in range(1, 23):

            ref_panel_file = ref_panel_folder + "/chr" + str(i) + ".eur.1pct.bim"
            ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)


Hanna  JULIENNE's avatar
Hanna JULIENNE committed
75
            known_zscore_file = zscore_folder + "/z_" + gwas_tag + "_chr" + str(i) + ".txt"
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
76
77
78
79
80
            known_zscore = pd.read_csv(known_zscore_file, index_col=0, sep="\t")

            chrom = "chr"+str(i)
            z_imp = self.chromosome_imputation(chrom, known_zscore, ref_panel, ld_folder)

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
81
            imputed_zscore = folder_output + "/z_" + gwas_tag + "_chr" + str(i) + ".txt"
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
82
            z_imp.to_csv(imputed_zscore, sep="\t")