Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Statistical-Genetics
RAISS
Commits
c79adc67
Commit
c79adc67
authored
Mar 07, 2018
by
Hanna JULIENNE
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed centered window imputation
parent
8b3491c6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
15 deletions
+40
-15
impute_jass/impute_jass/windows.py
impute_jass/impute_jass/windows.py
+40
-15
No files found.
impute_jass/impute_jass/windows.py
View file @
c79adc67
...
...
@@ -9,6 +9,8 @@ from .stat_models import ImpG_model_batch, ImpG_model_snp
from
.ld_matrix
import
generate_sparse_matrix
import
pandas
as
pd
import
numpy
as
np
def
parse_region_position
(
LD_file
):
"""
...
...
@@ -37,43 +39,66 @@ def realigned_zfiles_on_panel(ref_panel, Zscores):
return
Zscores
def
prepare_Zscore_for_imputation
(
ref_panel
,
Zscores
):
"""
Prepare the known Z score by realigning them on the reference ref_panel
the snps that are not present in the ref panel are filtered
"""
Zscores
=
realigned_zfiles_on_panel
(
ref_panel
,
Zscores
)
Zscores
[
'Var'
]
=
1
Zscores
[
'Nsnp_to_impute'
]
=
-
1
Zscores
=
Zscores
.
loc
[
Zscores
.
index
.
intersection
(
ref_panel
.
index
)]
return
Zscores
def
in_region
(
pos_vector
,
start
,
end
):
return
((
start
<
pos_vector
)
&
(
pos_vector
<
end
))
def
centered_window_imputation
(
LD_file
,
ref_panel_folder
,
Zfile
,
window_size
):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
(
chrom
,
start
pos
,
endpos
)
=
parse_region_position
(
LD_file
)
(
chrom
,
start
_ld_block
,
end_ld_block
)
=
parse_region_position
(
LD_file
)
ref_panel_file
=
"/mnt/atlas/PCMA/1._DATA/ImpG_refpanel/{0}.eur.1pct.bim"
.
format
(
chrom
)
print
(
ref_panel_file
)
ref_panel
=
pd
.
read_csv
(
ref_panel_file
,
sep
=
"
\t
"
,
names
=
[
'chr'
,
"nothing"
,
'pos'
,
'Ref_all'
,
'alt_all'
],
index_col
=
1
)
LD_mat
=
generate_sparse_matrix
(
LD_file
,
ref_panel
)
Zscores
=
pd
.
read_csv
(
Zfile
,
index_col
=
0
,
sep
=
"
\t
"
)
Zscores
=
prepare_Zscore_for_imputation
(
ref_panel
,
Zscores
)
Zscores
=
realigned_zfiles_on_panel
(
ref_panel
,
Zscores
)
Zscores
[
'Var'
]
=
1
# dispatch snp between typed and untyped
unknowns
=
LD_mat
.
index
.
difference
(
Df
.
index
)
# Find Snp to impute
unknowns
=
LD_mat
.
index
.
difference
(
Zscores
.
index
)
N_snp
=
len
(
unknowns
)
print
(
"### Imputation of {0} snps ###"
.
format
(
len
(
unknowns
)))
i
=
0
for
snp_unknown
in
unknowns
:
# Boundary of the centered_window
start_ld_block
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window
end_ld_block
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window
known
=
Zscores
.
loc
[(
start_ld_block
<
Df
.
pos
)
&
(
Df
.
pos
<
end_ld_block
)].
index
start_pos
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window_size
end_pos
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window_size
#print(snp_unknown, start_pos, end_pos, start_ld_block, end_ld_block)
in_LD_reg_n_window
=
in_region
(
Zscores
.
pos
,
int
(
start_ld_block
),
int
(
end_ld_block
))
&
in_region
(
Zscores
.
pos
,
start_pos
,
end_pos
)
known
=
Zscores
.
loc
[
in_LD_reg_n_window
].
index
Sig_t
=
LD_mat
.
loc
[
known
,
known
]
Sig_i_t
=
LD_mat
.
loc
[
snp_unknown
,
known
]
Zt
=
Zscores
.
loc
[
known
,
'Z'
]
imp
=
ImpG_model_snp
(
Zt
,
Sig_t
,
Sig_i_t
)
Zt
.
loc
[
snp_unknown
,
"Z"
]
=
imp
[
'mu'
]
Zt
.
loc
[
snp_unknown
,
"Var"
]
=
imp
[
'Var'
]
if
(
len
(
known
)
>
0
):
imp
=
ImpG_model_snp
(
Zt
,
Sig_t
,
Sig_i_t
)
Zscores
.
loc
[
snp_unknown
,
"Z"
]
=
imp
[
'mu'
]
Zscores
.
loc
[
snp_unknown
,
"Var"
]
=
imp
[
'Var'
]
Zscores
[
snp_unknown
,
'Nsnp_to_impute'
]
=
len
(
known
)
i
=
i
+
1
if
i
%
10
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
N_snp
,
4
)))
return
Zt
.
sort_values
(
by
=
"pos"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment