stat_models.py 1.88 KB
Newer Older
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
1
2
3
"""
function for SNP imputation
"""
hjulienne's avatar
hjulienne committed
4
import numpy as np
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
5
import scipy as sc
6
import scipy.linalg
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
7

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
8
9
def compute_mu(sig_i_t, sig_t_inv, zt):
    return np.dot(sig_i_t, np.dot(sig_t_inv, zt))
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
10

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
11
12
13
def compute_var(sig_i_t, sig_t_inv, lamb, batch=True):
    if batch:
        var = (1 + lamb) - np.einsum('ij,jk,ki->i', sig_i_t, sig_t_inv ,sig_i_t.transpose())
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
14
15
        ld_score = (sig_i_t**2).sum(1)

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
16
17
    else:
        var = (1 + lamb) - np.dot(sig_i_t, np.dot(sig_t_inv, sig_i_t.transpose()))
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
18
19
20
21
22
23
        ld_score = (sig_i_t**2).sum()

    return var, ld_score

def check_inversion(sig_t, sig_t_inv):
     return np.allclose(sig_t, np.dot(sig_t, np.dot(sig_t_inv, sig_t)))
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
24

25
26
27
28
29
30
31
32
33
34
35
36
37
def var_in_boundaries(var,lamb):
    """
    Forces the variance to be in the 0 to 1+lambda boundary
    theoritically we shouldn't have to do that
    """
    id_neg = np.where(var < 0)
    var_norm = var
    var[id_neg] = 0
    id_inf = np.where(var > (1+lamb))
    var[id_inf] = 1+lamb

    return var

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
38
def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01,  batch=True):
39
40
    """
    Argument:
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
41
        zt : (vector) the vector of known Z scores
42
    """
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
43
44
    sig_t = sig_t.values
    np.fill_diagonal(sig_t, (1+lamb))
45
    sig_t_inv = scipy.linalg.pinv(sig_t, rcond=rcond)
Hanna  JULIENNE's avatar
Hanna JULIENNE committed
46
47
48
49
50
51
52
53
54

    if batch:
        condition_number = np.array([np.linalg.cond(sig_t)]*sig_i_t.shape[0])
        correct_inversion = np.array([check_inversion(sig_t, sig_t_inv)]*sig_i_t.shape[0])
    else:
        condition_number = np.linalg.cond(sig_t)
        correct_inversion = check_inversion(sig_t, sig_t_inv)

    var, ld_score = compute_var(sig_i_t, sig_t_inv, lamb, batch)
55
56


Hanna  JULIENNE's avatar
Hanna JULIENNE committed
57
    mu = compute_mu(sig_i_t, sig_t_inv, zt)
58
59


Hanna  JULIENNE's avatar
Hanna JULIENNE committed
60
    if np.any(mu > 30):
61
62
63
64
        print("ABERANT SNP SNiP")
    var_norm = var_in_boundaries(var, lamb)
    mu = mu / np.sqrt(var_norm)

Hanna  JULIENNE's avatar
Hanna JULIENNE committed
65
    return({"var" : var, "mu" : mu, "ld_score" : ld_score, "condition_number" : condition_number, "correct_inversion":correct_inversion })