Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
8b3491c6
Commit
8b3491c6
authored
Mar 06, 2018
by
Hanna JULIENNE
Browse files
wrote centered window function
parent
36cb79b4
Changes
2
Hide whitespace changes
Inline
Side-by-side
impute_jass/impute_jass/stat_models.py
View file @
8b3491c6
"""
function for SNP imputation
"""
import
numpy
as
np
...
...
impute_jass/impute_jass/windows.py
View file @
8b3491c6
...
...
@@ -5,7 +5,10 @@ implement the imputation window is sliding along the genome:
- centered_window: A sliding window centered on the Snp to impute
"""
from
.stat_models
import
ImpG_model_batch
,
ImpG_model_snp
from
.ld_matrix
import
generate_sparse_matrix
import
pandas
as
pd
def
parse_region_position
(
LD_file
):
"""
...
...
@@ -17,9 +20,60 @@ def parse_region_position(LD_file):
(
chrom
,
startpos
,
endpos
)
=
LD_file
.
split
(
"/"
)[
-
1
].
split
(
"."
)[
0
].
split
(
'_'
)
return
(
chrom
,
startpos
,
endpos
)
def
centered_window_imputation
(
LD_file
,
ref_panel_folder
,
Zfile
):
def
realigned_zfiles_on_panel
(
ref_panel
,
Zscores
):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Check if the counted allele is the same in the reference panel and
the Zscore files.
If not, the coded and other allele are inverted and the Zscores sign
is inverted also.
"""
allele_inverted
=
(
ref_panel
.
loc
[
Zscores
.
index
,
'Ref_all'
]
!=
Zscores
.
A0
)
Zscores
.
loc
[
allele_inverted
,
"A0"
]
=
ref_panel
.
alt_all
Zscores
.
loc
[
allele_inverted
,
"A1"
]
=
ref_panel
.
Ref_all
Zscores
.
loc
[
allele_inverted
,
"Z"
]
=
-
Zscores
.
loc
[
allele_inverted
,
"Z"
]
return
Zscores
def
centered_window_imputation
(
LD_file
,
ref_panel_folder
,
Zfile
,
window_size
):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
pass
(
chrom
,
startpos
,
endpos
)
=
parse_region_position
(
LD_file
)
ref_panel_file
=
"/mnt/atlas/PCMA/1._DATA/ImpG_refpanel/{0}.eur.1pct.bim"
.
format
(
chrom
)
print
(
ref_panel_file
)
ref_panel
=
pd
.
read_csv
(
ref_panel_file
,
sep
=
"
\t
"
,
names
=
[
'chr'
,
"nothing"
,
'pos'
,
'Ref_all'
,
'alt_all'
],
index_col
=
1
)
LD_mat
=
generate_sparse_matrix
(
LD_file
,
ref_panel
)
Zscores
=
pd
.
read_csv
(
Zfile
,
index_col
=
0
,
sep
=
"
\t
"
)
Zscores
=
realigned_zfiles_on_panel
(
ref_panel
,
Zscores
)
Zscores
[
'Var'
]
=
1
# dispatch snp between typed and untyped
unknowns
=
LD_mat
.
index
.
difference
(
Df
.
index
)
print
(
"### Imputation of {0} snps ###"
.
format
(
len
(
unknowns
)))
for
snp_unknown
in
unknowns
:
# Boundary of the centered_window
start_ld_block
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window
end_ld_block
=
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window
known
=
Zscores
.
loc
[(
start_ld_block
<
Df
.
pos
)
&
(
Df
.
pos
<
end_ld_block
)].
index
Sig_t
=
LD_mat
.
loc
[
known
,
known
]
Sig_i_t
=
LD_mat
.
loc
[
snp_unknown
,
known
]
Zt
=
Zscores
.
loc
[
known
,
'Z'
]
imp
=
ImpG_model_snp
(
Zt
,
Sig_t
,
Sig_i_t
)
Zt
.
loc
[
snp_unknown
,
"Z"
]
=
imp
[
'mu'
]
Zt
.
loc
[
snp_unknown
,
"Var"
]
=
imp
[
'Var'
]
return
Zt
.
sort_values
(
by
=
"pos"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment