Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
3d3d5eef
Commit
3d3d5eef
authored
Mar 08, 2018
by
Hanna JULIENNE
Browse files
Fixed centered window imputation + stat models
parent
63689893
Changes
2
Hide whitespace changes
Inline
Side-by-side
impute_jass/impute_jass/stat_models.py
View file @
3d3d5eef
...
...
@@ -3,7 +3,7 @@ function for SNP imputation
"""
import
numpy
as
np
def
ImpG_model_batch
(
Zt
,
Sig_t
,
Sig_i_t
):
def
ImpG_model_batch
(
Zt
,
Sig_t
,
Sig_i_t
,
lamb
=
0.01
):
"""
Argument:
Zt : (vector) the vector of known Z scores
...
...
@@ -11,8 +11,9 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t):
#np.fill_diagonal(Sig_t.values, 1.01)
#Sig_t.fillna(0, inplace=True)
Sig_t_inv
=
np
.
linalg
.
inv
(
Sig_t
)
Sig_t
=
Sig_t
.
values
np
.
fill_diagonal
(
Sig_t
,
(
1
+
lamb
))
Sig_t_inv
=
np
.
linalg
.
pinv
(
Sig_t
)
Var
=
np
.
diag
(
Sig_t
)[
0
]
-
np
.
einsum
(
'ij,jk,ki->i'
,
Sig_i_t
,
Sig_t_inv
,
Sig_i_t
.
transpose
())
...
...
@@ -21,16 +22,21 @@ def ImpG_model_batch(Zt, Sig_t, Sig_i_t):
return
({
"Var"
:
Var
,
"mu"
:
mu
})
def
ImpG_model_snp
(
Zt
,
Sig_t
,
Sig_i_t
):
def
ImpG_model_snp
(
Zt
,
Sig_t
,
Sig_i_t
,
lamb
=
0.01
):
"""
Argument:
Zt : (vector) the vector of known Z scores
"""
#np.fill_diagonal(Sig_t.values, 1.01)
#Sig_t.fillna(0, inplace=True)
Sig_t_inv
=
np
.
linalg
.
inv
(
Sig_t
)
Sig_t
=
Sig_t
.
values
np
.
fill_diagonal
(
Sig_t
,
(
1
+
lamb
))
#I = np.identity(Sig_t.shape[0])
#Sig_t_inv =np.linalg.inv(Sig_t)
Sig_t_inv
=
np
.
linalg
.
pinv
(
Sig_t
)
Var
=
np
.
diag
(
Sig_t
)[
0
]
-
np
.
dot
(
Sig_i_t
,
np
.
dot
(
Sig_t_inv
,
Sig_i_t
.
transpose
()))
if
Var
<
0
:
Var
=
0
#np.einsum('ij,jk,ki->i', Sig_i_t, Sig_t_inv ,Sig_i_t.transpose())
mu
=
np
.
dot
(
Sig_i_t
,
np
.
dot
(
Sig_t_inv
,
Zt
))
...
...
impute_jass/impute_jass/windows.py
View file @
3d3d5eef
...
...
@@ -54,7 +54,7 @@ def prepare_Zscore_for_imputation(ref_panel, Zscores):
def
in_region
(
pos_vector
,
start
,
end
):
return
((
start
<
pos_vector
)
&
(
pos_vector
<
end
))
def
centered_window_imputation
(
LD_file
,
ref_panel_folder
,
Zscores
,
window_size
,
unknowns
=
pd
.
Series
([])):
def
Ld_region_
centered_window_imputation
(
LD_file
,
ref_panel_folder
,
Zscores
,
window_size
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
...
...
@@ -80,11 +80,11 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size,
for
snp_unknown
in
unknowns
:
# Boundary of the centered_window
start_pos
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window_size
end_pos
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window_size
start_pos
=
max
((
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window_size
),
start_ld_block
)
end_pos
=
min
(
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window_size
,
end_ld_block
)
#print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block)
in_LD_reg_n_window
=
in_region
(
Zscores
.
pos
,
int
(
start_ld_block
),
int
(
end_ld_block
))
&
in_region
(
Zscores
.
pos
,
start_pos
,
end_pos
)
in_LD_reg_n_window
=
in_region
(
Zscores
.
pos
,
start_pos
,
end_pos
)
known
=
Zscores
.
loc
[
in_LD_reg_n_window
].
index
Sig_t
=
LD_mat
.
loc
[
known
,
known
]
...
...
@@ -93,14 +93,13 @@ def centered_window_imputation(LD_file, ref_panel_folder, Zscores, window_size,
if
(
len
(
known
)
>
0
):
imp
=
ImpG_model_snp
(
Zt
,
Sig_t
,
Sig_i_t
)
Zscores
.
loc
[
snp_unknown
,
[
"pos"
,
"A0"
,
"A1"
]]
=
ref_panel
.
loc
[
snp_unknown
,
[
'pos'
,
"Ref_all"
,
"alt_all"
]]
Zscores
.
loc
[
snp_unknown
,
"pos"
]
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
Zscores
.
loc
[
snp_unknown
,
"A0"
]
=
ref_panel
.
loc
[
snp_unknown
,
"Ref_all"
]
Zscores
.
loc
[
snp_unknown
,
"A1"
]
=
ref_panel
.
loc
[
snp_unknown
,
"alt_all"
]
Zscores
.
loc
[
snp_unknown
,
"Z"
]
=
imp
[
'mu'
]
Zscores
.
loc
[
snp_unknown
,
"Var"
]
=
imp
[
'Var'
]
Zscores
.
loc
[
snp_unknown
,
'Nsnp_to_impute'
]
=
len
(
known
)
i
=
i
+
1
if
i
%
100
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
N_snp
,
4
)))
return
Zscores
.
sort_values
(
by
=
"pos"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment