Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
R
RAISS
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
1
Issues
1
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Statistical-Genetics
RAISS
Commits
2ec760e8
Commit
2ec760e8
authored
Apr 11, 2018
by
Hanna JULIENNE
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add filter on SNP variance
parent
b11049c0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
42 deletions
+43
-42
impute_jass/impute_jass/windows.py
impute_jass/impute_jass/windows.py
+43
-42
No files found.
impute_jass/impute_jass/windows.py
View file @
2ec760e8
...
...
@@ -20,7 +20,6 @@ def parse_region_position(ld_file):
(
chrom
,
startpos
,
endpos
)
=
ld_file
.
split
(
"/"
)[
-
1
].
split
(
"."
)[
0
].
split
(
'_'
)
return
(
chrom
,
startpos
,
endpos
)
def
realigned_zfiles_on_panel
(
ref_panel
,
zscore
):
"""
Check if the counted allele is the same in the reference panel and
...
...
@@ -52,44 +51,6 @@ def prepare_zscore_for_imputation(ref_panel, zscore):
def
in_region
(
pos_vector
,
start
,
end
):
return
((
start
<
pos_vector
)
&
(
pos_vector
<
end
))
def
ld_region_centered_window_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
(
chrom
,
start_ld_block
,
end_ld_block
)
=
parse_region_position
(
ld_file
)
LD_mat
=
generate_sparse_matrix
(
ld_file
,
ref_panel
)
zscore
=
prepare_zscore_for_imputation
(
ref_panel
,
zscore
)
# Find Snp to impute
if
len
(
unknowns
)
==
0
:
unknowns
=
LD_mat
.
index
.
difference
(
zscore
.
index
)
N_snp
=
len
(
unknowns
)
print
(
"### Imputation of {0} snps ###"
.
format
(
len
(
unknowns
)))
for
i
,
snp_unknown
in
enumerate
(
unknowns
):
# Boundary of the centered_window
start_pos
=
max
((
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window_size
),
float
(
start_ld_block
))
end_pos
=
min
(
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window_size
,
float
(
end_ld_block
))
in_LD_reg_n_window
=
in_region
(
zscore
.
pos
,
start_pos
,
end_pos
)
known
=
zscore
.
loc
[
in_LD_reg_n_window
].
index
sig_t
=
LD_mat
.
loc
[
known
,
known
]
sig_i_t
=
LD_mat
.
loc
[
snp_unknown
,
known
]
zt
=
zscore
.
loc
[
known
,
'Z'
]
if
(
len
(
known
)
>
0
):
imp
=
impg_model
(
zt
,
sig_t
,
sig_i_t
,
batch
=
False
)
zscore
.
loc
[
snp_unknown
]
=
[
ref_panel
.
loc
[
snp_unknown
,
'pos'
],
ref_panel
.
loc
[
snp_unknown
,
"Ref_all"
],
ref_panel
.
loc
[
snp_unknown
,
"alt_all"
],
imp
[
'mu'
],
imp
[
'var'
],
len
(
known
)]
if
i
%
300
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
N_snp
,
4
)))
return
zscore
.
sort_values
(
by
=
"pos"
)
def
compute_window_and_size
(
start_ld_block
,
end_ld_block
,
window_size
):
"""
...
...
@@ -131,8 +92,6 @@ def print_progression(i, Nwindows):
if
i
%
(
np
.
ceil
(
Nwindows
/
10
))
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
Nwindows
,
3
)))
def
impg_like_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
buffer
,
lamb
,
rcond
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
...
...
@@ -178,8 +137,50 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
end_core_window
=
int
(
start_ld_block
)
+
(
i
+
1
)
*
window_resize
in_core_window
=
in_region
(
batch_df
.
pos
,
start_core_window
,
end_core_window
)
zscore_results
=
pd
.
concat
([
zscore_results
,
batch_df
.
loc
[
in_core_window
,
zscore_results
.
columns
]])
# keep only SNP with non negligible explained variance
snp_well_predicted
=
batch_df
.
Var
<
0.5
batch_df_filt
=
batch_df_filt
.
loc
[
in_core_window
&
snp_well_predicted
,
zscore_results
.
columns
]
zscore_results
=
pd
.
concat
([
zscore_results
,
batch_df_filt
])
i
=
i
+
1
print_progression
(
i
,
Nwindows
)
return
zscore_results
.
sort_values
(
by
=
"pos"
)
def
ld_region_centered_window_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
(
chrom
,
start_ld_block
,
end_ld_block
)
=
parse_region_position
(
ld_file
)
LD_mat
=
generate_sparse_matrix
(
ld_file
,
ref_panel
)
zscore
=
prepare_zscore_for_imputation
(
ref_panel
,
zscore
)
# Find Snp to impute
if
len
(
unknowns
)
==
0
:
unknowns
=
LD_mat
.
index
.
difference
(
zscore
.
index
)
N_snp
=
len
(
unknowns
)
print
(
"### Imputation of {0} snps ###"
.
format
(
len
(
unknowns
)))
for
i
,
snp_unknown
in
enumerate
(
unknowns
):
# Boundary of the centered_window
start_pos
=
max
((
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
-
window_size
),
float
(
start_ld_block
))
end_pos
=
min
(
ref_panel
.
loc
[
snp_unknown
,
'pos'
]
+
window_size
,
float
(
end_ld_block
))
in_LD_reg_n_window
=
in_region
(
zscore
.
pos
,
start_pos
,
end_pos
)
known
=
zscore
.
loc
[
in_LD_reg_n_window
].
index
sig_t
=
LD_mat
.
loc
[
known
,
known
]
sig_i_t
=
LD_mat
.
loc
[
snp_unknown
,
known
]
zt
=
zscore
.
loc
[
known
,
'Z'
]
if
(
len
(
known
)
>
0
):
imp
=
impg_model
(
zt
,
sig_t
,
sig_i_t
,
batch
=
False
)
zscore
.
loc
[
snp_unknown
]
=
[
ref_panel
.
loc
[
snp_unknown
,
'pos'
],
ref_panel
.
loc
[
snp_unknown
,
"Ref_all"
],
ref_panel
.
loc
[
snp_unknown
,
"alt_all"
],
imp
[
'mu'
],
imp
[
'var'
],
len
(
known
)]
if
i
%
300
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
N_snp
,
4
)))
return
zscore
.
sort_values
(
by
=
"pos"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment