Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
8c2374aa
Commit
8c2374aa
authored
Mar 30, 2018
by
Hanna JULIENNE
Browse files
refactoring windows.py a little
parent
12959804
Changes
3
Hide whitespace changes
Inline
Side-by-side
impute_jass/impute_jass/__main__.py
View file @
8c2374aa
...
...
@@ -8,17 +8,21 @@ def launch_chromosome_imputation(args):
Function whose only purpose is to allow the calling of the ImputationLauncher.chromosome_imputation method
from an entry point
"""
print
(
args
)
print
(
"Imputation of {0} gwas for chromosome {1}"
.
format
(
args
.
gwas
,
args
.
chrom
)
)
imputer
=
ImputationLauncher
(
window_size
=
int
(
args
.
window_size
),
buf
=
int
(
args
.
buffer_size
),
lamb
=
float
(
args
.
l2_regularization
),
pinv_rcond
=
float
(
args
.
eigen_treshold
))
z_file
=
"{0}/z_{1}_{2}.txt"
.
format
(
args
.
zscore_folder
,
args
.
gwas
,
args
.
chrom
)
zscore
=
pd
.
read_csv
(
z_file
)
zscore
=
pd
.
read_csv
(
z_file
,
index_col
=
0
,
sep
=
"
\t
"
)
ref_panel_file
=
args
.
ref_folder
+
"/"
+
args
.
chrom
+
".eur.1pct.bim"
ref_panel
=
pd
.
read_csv
(
ref_panel_file
,
sep
=
"
\t
"
,
names
=
[
'chr'
,
"nothing"
,
'pos'
,
'Ref_all'
,
'alt_all'
],
index_col
=
1
)
imputer
.
chromosome_imputation
(
args
.
chrom
,
zscore
,
ref_panel
,
args
.
ld_folder
)
imputed_zscore
=
imputer
.
chromosome_imputation
(
args
.
chrom
,
zscore
,
ref_panel
,
args
.
ld_folder
)
print
(
"Imputation DONE"
)
z_fo
=
"{0}/z_{1}_{2}.txt"
.
format
(
args
.
output_folder
,
args
.
gwas
,
args
.
chrom
)
imputed_zscore
.
to_csv
(
z_fo
,
sep
=
'
\t
'
)
print
(
"Save imputation done "
)
def
add_chromosome_imputation_argument
(
parser
):
parser
.
add_argument
(
'--chrom'
,
required
=
True
,
help
=
"chromosome to impute to the chr\d+ format"
)
...
...
impute_jass/impute_jass/stat_models.py
View file @
8c2374aa
...
...
@@ -3,6 +3,7 @@ function for SNP imputation
"""
import
numpy
as
np
import
scipy
as
sc
import
scipy.linalg
def
compute_mu
(
sig_i_t
,
sig_t_inv
,
zt
):
return
np
.
dot
(
sig_i_t
,
np
.
dot
(
sig_t_inv
,
zt
))
...
...
@@ -28,7 +29,7 @@ def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
"""
sig_t
=
sig_t
.
values
np
.
fill_diagonal
(
sig_t
,
(
1
+
lamb
))
sig_t_inv
=
sc
.
linalg
.
pinv
(
sig_t
,
rcond
=
rcond
)
sig_t_inv
=
sc
ipy
.
linalg
.
pinv
(
sig_t
,
rcond
=
rcond
)
if
batch
:
condition_number
=
np
.
array
([
np
.
linalg
.
cond
(
sig_t
)]
*
sig_i_t
.
shape
[
0
])
...
...
impute_jass/impute_jass/windows.py
View file @
8c2374aa
...
...
@@ -90,14 +90,12 @@ def ld_region_centered_window_imputation(ld_file, ref_panel, zscore, window_size
return
zscore
.
sort_values
(
by
=
"pos"
)
def
impg_like_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
buffer
,
lamb
,
rcond
,
unknowns
=
pd
.
Series
([])):
def
compute_window_and_size
(
start_ld_block
,
end_ld_block
,
window_size
):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
Compute the number of window to pave the ld_block tightly
"""
(
chrom
,
start_ld_block
,
end_ld_block
)
=
parse_region_position
(
ld_file
)
LD_mat
=
generate_sparse_matrix
(
ld_file
,
ref_panel
)
Nwindows
=
((
int
(
end_ld_block
))
-
(
int
(
start_ld_block
)))
//
window_size
# adapt window size to cover the LD block
if
Nwindows
>
0
:
...
...
@@ -105,9 +103,49 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
else
:
Nwindows
=
1
window_resize
=
np
.
ceil
(
int
(
end_ld_block
)
-
(
int
(
start_ld_block
)))
return
Nwindows
,
window_resize
def
format_result_df
(
imp
,
ref_panel
,
known
,
unknowns
):
result_dict
=
{
'pos'
:
ref_panel
.
loc
[
unknowns
,
'pos'
],
'A0'
:
ref_panel
.
loc
[
unknowns
,
"Ref_all"
],
"A1"
:
ref_panel
.
loc
[
unknowns
,
"alt_all"
],
"Z"
:
imp
[
'mu'
],
"Var"
:
imp
[
"var"
],
"ld_score"
:
imp
[
"ld_score"
],
"condition_number"
:
imp
[
'condition_number'
],
"correct_inversion"
:
imp
[
"correct_inversion"
],
"Nsnp_to_impute"
:
len
(
known
)
}
column_order
=
[
'pos'
,
'A0'
,
"A1"
,
"Z"
,
"Var"
,
"ld_score"
,
"condition_number"
,
"correct_inversion"
,
"Nsnp_to_impute"
]
batch_df
=
pd
.
DataFrame
(
result_dict
,
columns
=
column_order
)
return
batch_df
def
print_progression
(
i
,
Nwindows
):
"""
print the percentage of window treated in the ld block
"""
if
i
%
(
np
.
ceil
(
Nwindows
/
10
))
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
Nwindows
,
3
)))
def
impg_like_imputation
(
ld_file
,
ref_panel
,
zscore
,
window_size
,
buffer
,
lamb
,
rcond
,
unknowns
=
pd
.
Series
([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
(
chrom
,
start_ld_block
,
end_ld_block
)
=
parse_region_position
(
ld_file
)
LD_mat
=
generate_sparse_matrix
(
ld_file
,
ref_panel
)
Nwindows
,
window_resize
=
compute_window_and_size
(
start_ld_block
,
end_ld_block
,
window_size
)
all_unknowns
=
ref_panel
.
loc
[
ref_panel
.
index
.
difference
(
zscore
.
index
)]
#zscore = pd.read_csv(Zfile, index_col=0, sep="\t")
zscore
=
prepare_zscore_for_imputation
(
ref_panel
,
zscore
)
zscore_results
=
zscore
.
copy
(
deep
=
True
)
...
...
@@ -133,28 +171,15 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
if
(
len
(
known
)
>
0
):
imp
=
impg_model
(
zt
,
sig_t
,
sig_i_t
,
lamb
=
lamb
,
rcond
=
rcond
,
batch
=
True
)
batch_df
=
pd
.
DataFrame
({
'pos'
:
ref_panel
.
loc
[
unknowns
,
'pos'
],
'A0'
:
ref_panel
.
loc
[
unknowns
,
"Ref_all"
],
"A1"
:
ref_panel
.
loc
[
unknowns
,
"alt_all"
],
"ws_start"
:
start_windows
,
"ws_end"
:
end_windows
,
"Z"
:
imp
[
'mu'
],
"Var"
:
imp
[
"var"
],
"ld_score"
:
imp
[
"ld_score"
],
"condition_number"
:
imp
[
'condition_number'
],
"correct_inversion"
:
imp
[
"correct_inversion"
],
"Nsnp_to_impute"
:
len
(
known
)
})
# keep only snp in the core window
start_windows
=
int
(
start_ld_block
)
+
i
*
window_resize
end_windows
=
int
(
start_ld_block
)
+
(
i
+
1
)
*
window_resize
in_core_window
=
in_region
(
batch_df
.
pos
,
start_windows
,
end_windows
)
zscore_results
=
pd
.
concat
([
zscore_results
,
batch_df
.
loc
[
in_core_window
]])
batch_df
=
format_result_df
(
imp
,
ref_panel
,
known
,
unknowns
):
# keep only snp in the core window (and not in the buffer)
start_core_window
=
int
(
start_ld_block
)
+
i
*
window_resize
end_core_window
=
int
(
start_ld_block
)
+
(
i
+
1
)
*
window_resize
in_core_window
=
in_region
(
batch_df
.
pos
,
start_core_window
,
end_core_window
)
zscore_results
=
pd
.
concat
([
zscore_results
,
batch_df
.
loc
[
in_core_window
]])
i
=
i
+
1
if
i
%
10
==
0
:
print
(
"{0}\%"
.
format
(
np
.
round
(
i
/
Nwindows
,
4
)))
print_progression
(
i
,
Nwindows
)
return
zscore_results
.
sort_values
(
by
=
"pos"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment