Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
2ec760e8
Commit
2ec760e8
authored
Apr 11, 2018
by
Hanna JULIENNE
Browse files
add filter on SNP variance
parent
b11049c0
Changes
1
Hide whitespace changes
Inline
Side-by-side
impute_jass/impute_jass/windows.py
View file @
2ec760e8
...
...
@@ -20,7 +20,6 @@ def parse_region_position(ld_file):
(
chrom
,
startpos
,
endpos
)
=
ld_file
.
split
(
"/"
)[
-
1
].
split
(
"."
)[
0
].
split
(
'_'
)
return
(
chrom
,
startpos
,
endpos
)
def
realigned_zfiles_on_panel
(
ref_panel
,
zscore
):
"""
Check if the counted allele is the same in the reference panel and
...
...
@@ -52,44 +51,6 @@ def prepare_zscore_for_imputation(ref_panel, zscore):
def
in_region
(
pos_vector
,
start
,
end
):
return
((
start
<
pos_vector
)
&
(
pos_vector
<
end
))
def
ld_region_centered_window_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
(
chrom
,
start_ld_block
,
end_ld_block
)
=
parse_region_position
(
ld_file
)
LD_mat
=
generate_sparse_matrix
(
ld_file
,
ref_panel
)
zscore
=
prepare_zscore_for_imputation
(
ref_panel
,
zscore
)
# Find Snp to impute
if
len
(
unknowns
)
==
0
:
unknowns
=
LD_mat
.
index
.
difference
(
zscore
.
index
)
N_snp
=
len
(
unknowns
)
print
(
"### Imputation of {0} snps ###"
.
format
(
len
(
unknowns
)))
for
i
,
snp_unknown
in
enumerate
(
unknowns
):
# Boundary of the centered_window
start_pos
=
max
((
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window_size
),
float
(
start_ld_block
))
end_pos
=
min
(
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window_size
,
float
(
end_ld_block
))
in_LD_reg_n_window
=
in_region
(
zscore
.
pos
,
start_pos
,
end_pos
)
known
=
zscore
.
loc
[
in_LD_reg_n_window
].
index
sig_t
=
LD_mat
.
loc
[
known
,
known
]
sig_i_t
=
LD_mat
.
loc
[
snp_unknown
,
known
]
zt
=
zscore
.
loc
[
known
,
'Z'
]
if
(
len
(
known
)
>
0
):
imp
=
impg_model
(
zt
,
sig_t
,
sig_i_t
,
batch
=
False
)
zscore
.
loc
[
snp_unknown
]
=
[
ref_panel
.
loc
[
snp_unknown
,
'pos'
],
ref_panel
.
loc
[
snp_unknown
,
"Ref_all"
],
ref_panel
.
loc
[
snp_unknown
,
"alt_all"
],
imp
[
'mu'
],
imp
[
'var'
],
len
(
known
)]
if
i
%
300
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
N_snp
,
4
)))
return
zscore
.
sort_values
(
by
=
"pos"
)
def
compute_window_and_size
(
start_ld_block
,
end_ld_block
,
window_size
):
"""
...
...
@@ -131,8 +92,6 @@ def print_progression(i, Nwindows):
if
i
%
(
np
.
ceil
(
Nwindows
/
10
))
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
Nwindows
,
3
)))
def
impg_like_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
buffer
,
lamb
,
rcond
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
...
...
@@ -178,8 +137,50 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
end_core_window
=
int
(
start_ld_block
)
+
(
i
+
1
)
*
window_resize
in_core_window
=
in_region
(
batch_df
.
pos
,
start_core_window
,
end_core_window
)
zscore_results
=
pd
.
concat
([
zscore_results
,
batch_df
.
loc
[
in_core_window
,
zscore_results
.
columns
]])
# keep only SNP with non negligible explained variance
snp_well_predicted
=
batch_df
.
Var
<
0.5
batch_df_filt
=
batch_df_filt
.
loc
[
in_core_window
&
snp_well_predicted
,
zscore_results
.
columns
]
zscore_results
=
pd
.
concat
([
zscore_results
,
batch_df_filt
])
i
=
i
+
1
print_progression
(
i
,
Nwindows
)
return
zscore_results
.
sort_values
(
by
=
"pos"
)
def
ld_region_centered_window_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
(
chrom
,
start_ld_block
,
end_ld_block
)
=
parse_region_position
(
ld_file
)
LD_mat
=
generate_sparse_matrix
(
ld_file
,
ref_panel
)
zscore
=
prepare_zscore_for_imputation
(
ref_panel
,
zscore
)
# Find Snp to impute
if
len
(
unknowns
)
==
0
:
unknowns
=
LD_mat
.
index
.
difference
(
zscore
.
index
)
N_snp
=
len
(
unknowns
)
print
(
"### Imputation of {0} snps ###"
.
format
(
len
(
unknowns
)))
for
i
,
snp_unknown
in
enumerate
(
unknowns
):
# Boundary of the centered_window
start_pos
=
max
((
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window_size
),
float
(
start_ld_block
))
end_pos
=
min
(
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window_size
,
float
(
end_ld_block
))
in_LD_reg_n_window
=
in_region
(
zscore
.
pos
,
start_pos
,
end_pos
)
known
=
zscore
.
loc
[
in_LD_reg_n_window
].
index
sig_t
=
LD_mat
.
loc
[
known
,
known
]
sig_i_t
=
LD_mat
.
loc
[
snp_unknown
,
known
]
zt
=
zscore
.
loc
[
known
,
'Z'
]
if
(
len
(
known
)
>
0
):
imp
=
impg_model
(
zt
,
sig_t
,
sig_i_t
,
batch
=
False
)
zscore
.
loc
[
snp_unknown
]
=
[
ref_panel
.
loc
[
snp_unknown
,
'pos'
],
ref_panel
.
loc
[
snp_unknown
,
"Ref_all"
],
ref_panel
.
loc
[
snp_unknown
,
"alt_all"
],
imp
[
'mu'
],
imp
[
'var'
],
len
(
known
)]
if
i
%
300
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
N_snp
,
4
)))
return
zscore
.
sort_values
(
by
=
"pos"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment