Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
98125ff6
Commit
98125ff6
authored
Feb 22, 2022
by
Hanna JULIENNE
Browse files
improved performance report
parent
8f919ee4
Changes
2
Hide whitespace changes
Inline
Side-by-side
raiss/imputation_R2.py
View file @
98125ff6
...
...
@@ -67,17 +67,26 @@ def imputation_performance(zscore_initial, zscore_imputed, masked):
masked : SNPs ids which have been masked by imputation
"""
try
:
N_masked
=
len
(
masked
)
masked
=
zscore_imputed
.
index
.
intersection
(
masked
)
fraction_imputed
=
1.0
-
zscore_imputed
.
loc
[
masked
,
"Z"
].
isnull
().
mean
()
N_imputed
=
len
(
masked
)
fraction_imputed
=
N_imputed
/
N_masked
#cor = zscore_initial.loc[masked, "Z"].corr(zscore_imputed.loc[masked, "Z"].fillna(0))
cor
=
zscore_initial
.
loc
[
masked
,
"Z"
].
corr
(
zscore_imputed
.
loc
[
masked
,
"Z"
])
MAE
=
(
zscore_initial
.
loc
[
masked
,
"Z"
]
-
zscore_imputed
.
loc
[
masked
,
"Z"
]).
dropna
().
abs
().
mean
()
return
{
'N_SNP'
:
len
(
masked
),
'fraction_imputed'
:
fraction_imputed
,
'cor'
:
cor
,
'mean_absolute_error'
:
MAE
}
except
KeyError
:
error_abs
=
(
zscore_initial
.
loc
[
masked
,
"Z"
]
-
zscore_imputed
.
loc
[
masked
,
"Z"
]).
dropna
().
abs
()
MAE
=
error_abs
.
mean
()
error_quantile
=
error_abs
.
quantile
([
0
,
0.5
,
1
])
SNP_max_error
=
error_abs
.
idxmax
()
return
{
'N_SNP'
:
len
(
masked
),
'fraction_imputed'
:
fraction_imputed
,
'cor'
:
cor
,
'mean_absolute_error'
:
MAE
,
'median_absolute_error'
:
error_quantile
.
loc
[
0.5
],
'min_absolute_error'
:
error_quantile
.
loc
[
0.0
],
'max_absolute_error'
:
error_quantile
.
loc
[
1.0
],
"SNP_max_error"
:
SNP_max_error
}
except
(
ValueError
,
KeyError
)
as
e
:
print
(
e
)
# If KeyError none of the masked_SNP are in the imputed dataframe
res
=
np
.
nan
return
{
'N_SNP'
:
np
.
nan
,
'fraction_imputed'
:
np
.
nan
,
'cor'
:
np
.
nan
,
'mean_absolute_error'
:
np
.
nan
}
return
{
'N_SNP'
:
np
.
nan
,
'fraction_imputed'
:
np
.
nan
,
'cor'
:
np
.
nan
,
'mean_absolute_error'
:
np
.
nan
,
'median_absolute_error'
:
np
.
nan
,
'min_absolute_error'
:
np
.
nan
,
'max_absolute_error'
:
np
.
nan
,
"SNP_max_error"
:
np
.
nan
}
def
z_amplitude_effect
(
zscore_folder
,
masked_folder
,
output_folder
,
ref_folder
,
ld_folder
,
gwas
,
ref_panel_preffix
=
""
,
ref_panel_suffix
=
".eur.1pct.bim"
,
...
...
@@ -192,7 +201,8 @@ def grid_search(zscore_folder, masked_folder, output_folder,
n_cpu
=
multiprocessing
.
cpu_count
()
Parallel
(
n_jobs
=
n_cpu
)(
delayed
(
run_imputation
)(
rd
)
for
rd
in
eigen_ratio_grid
)
R2_serie
=
pd
.
DataFrame
({
"cor"
:
np
.
nan
,
"mean_absolute_error"
:
np
.
nan
,
"fraction_imputed"
:
np
.
nan
},
index
=
eigen_ratio_grid
)
R2_serie
=
pd
.
DataFrame
({
'N_SNP'
:
np
.
nan
,
'fraction_imputed'
:
np
.
nan
,
'cor'
:
np
.
nan
,
'mean_absolute_error'
:
np
.
nan
,
'median_absolute_error'
:
np
.
nan
,
'min_absolute_error'
:
np
.
nan
,
'max_absolute_error'
:
np
.
nan
,
"SNP_max_error"
:
np
.
nan
},
index
=
eigen_ratio_grid
)
for
rd
in
eigen_ratio_grid
:
z_output
=
"{0}/z_{1}_{2}_{3}.txt"
.
format
(
output_folder
,
gwas
,
chrom
,
rd
)
...
...
@@ -204,10 +214,15 @@ def grid_search(zscore_folder, masked_folder, output_folder,
print
(
e
)
res
=
np
.
nan
R2_serie
.
loc
[
rd
,
'N_SNP'
]
=
res
[
"N_SNP"
]
R2_serie
.
loc
[
rd
,
'cor'
]
=
res
[
"cor"
]
R2_serie
.
loc
[
rd
,
'mean_absolute_error'
]
=
res
[
"mean_absolute_error"
]
R2_serie
.
loc
[
rd
,
'fraction_imputed'
]
=
res
[
"fraction_imputed"
]
R2_serie
.
loc
[
rd
,
'median_absolute_error'
]
=
res
[
"median_absolute_error"
]
R2_serie
.
loc
[
rd
,
'min_absolute_error'
]
=
res
[
"min_absolute_error"
]
R2_serie
.
loc
[
rd
,
'max_absolute_error'
]
=
res
[
"max_absolute_error"
]
R2_serie
.
loc
[
rd
,
'SNP_max_error'
]
=
res
[
"SNP_max_error"
]
print
(
len
(
masked_SNP
))
print
(
"Result for rd {0} = cor: {1}, fraction_imputed: {2}"
.
format
(
rd
,
res
[
"cor"
],
res
[
"fraction_imputed"
]
))
print
(
"Result for rd {0} = cor: {1}, fraction_imputed: {2}"
.
format
(
rd
,
res
[
"cor"
],
res
[
"fraction_imputed"
]))
return
(
R2_serie
)
raiss/stat_models.py
View file @
98125ff6
...
...
@@ -63,7 +63,7 @@ def var_in_boundaries(var,lamb):
id_neg
=
np
.
where
(
var
<
0
)
var_norm
=
var
var
[
id_neg
]
=
0
id_inf
=
np
.
where
(
var
>
(
1
+
lamb
))
id_inf
=
np
.
where
(
var
>
(
0.99999
+
lamb
))
var
[
id_inf
]
=
1
return
var
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment