Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
hgcovid_imputation
Commits
11560560
Commit
11560560
authored
Mar 23, 2022
by
Hanna JULIENNE
Browse files
results from simulation
parent
f76f8cd3
Changes
7
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
11560560
...
...
@@ -45,6 +45,7 @@ Project Organization
│ │
│ ├── features
│ │ └── add_hg_37_pos.py : perform liftover to hg37 (ad GnomAD is in hg37)
| | |__ retrieve_filled_out.py : Retrieve european studies with more than 80% of filled out SNPS
│ │
│ ├── models
│ │ │
...
...
@@ -55,9 +56,7 @@ Project Organization
impute Zscores using simulated data : mask 50 SNPs on
200, reimpute then save results assuming different sample size (based on hgcovid consortium or always 100)
|__ Impution_real_data.py
|__ Imputation_test_real_data.py : mask 10% of SNPs around significant loci and impute them back
│ │
│ └── visualization <- Scripts to create exploratory and results oriented visualizations
│ └── Draw_LD.R : Draw the LD matrix used to simulate Data
...
...
@@ -65,6 +64,7 @@ Project Organization
| (intrinsic variability due to sample size)
| |__ Draw_signal_variability.R : draw imputed signal
| ( variability due to sample size + imputation error)
|_ Imputation_strategy_real_data.R : Compare meta_analysis based on original Z-scores vs Imputed Z-scores
│
└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
...
...
reports/figures/meta_analysis_perf.png
View replaced file @
f76f8cd3
View file @
11560560
29.8 KB
|
W:
|
H:
25.5 KB
|
W:
|
H:
2-up
Swipe
Onion skin
reports/tables/Meta_analysis_imputation_performance.csv
View file @
11560560
"","correlation","Max_L1_error","Mean_L1_error"
"meta_Z_imputed",0.99
5683217500374,1.22529552037174,0.0562121582752216
"meta_Z_imputed_R2weight",0.99
0440000978169,1.59426643449452,0.454110616671281
"meta_Z_imputed_high_samp",0.9
89021574001197,3.62404793904812,1.07203953928316
"meta_Z_imputed_valid",0.99
6024141348819,1.22529552037174
,0.0
5
60
2862121342
"meta_Z_imputed_valid_R2weight",0.99
6024141348819,1.22529552037174
,0.0
5
60
2862121342
"meta_Z_imputed_valid_high_samp",0.99
5289648693123,3.36583792058164,1.0635503576084
"meta_Z_imputed",0.99
7715664577698,2.10996809407546,0.0675865516980447
"meta_Z_imputed_R2weight",0.99
5827954783326,5.18573176225036,0.430368355995472
"meta_Z_imputed_high_samp",0.9
95382502246784,14.0988026398761,1.05536810725203
"meta_Z_imputed_valid",0.99
8172604572373,2.10996809407546
,0.0
6
60
996762793101
"meta_Z_imputed_valid_R2weight",0.99
8172604572373,2.10996809407546
,0.0
6
60
996762793099
"meta_Z_imputed_valid_high_samp",0.99
7893748220566,14.0588918701583,1.09929561392649
reports/tables/random_masking_real_data_error.csv
View file @
11560560
...
...
@@ -356,3 +356,37 @@
"355","BoSCO_EUR_Z",0.985476354338373,0.0956705476111556,0.807414380574432,26
"356","Amsterdam_UMC_COVID_study_group_EUR_Z",0.968965449672844,0.128762104108945,0.798285013504903,26
"357","meta_Z",0.976898019792069,0.113298175037217,1.0469342369506,26
"358","HOSTAGE_EUR_Z",0.991328644870294,0.141343612759392,0.160127095950146,27
"359","GHS_Freeze_145_EUR_Z",0.99766767575791,0.0749037098892356,0.228020291817981,27
"360","UKBB_EUR_Z",0.997094759585677,0.0515461004095718,0.160799166711033,27
"361","GENCOVID_EUR_Z",0.909612515827131,0.136975421410257,0.435332338970176,27
"362","BelCovid_EUR_Z",0.996923577061899,0.0584577391022102,0.0744064871095851,27
"363","23ANDME_EUR_Z",0.998858805300257,0.0639462153774435,0.100111024149858,27
"364","SweCovid_EUR_Z",0.984107078760595,0.122312852774997,0.288453118204419,27
"365","BQC19_EUR_Z",0.991219808057431,0.0947491992326761,0.351113880427233,27
"366","idipaz24genetics_EUR_Z",0.978086957636132,0.149899608436772,0.441550907259962,27
"367","ANCESTRY_Freeze_Four_EUR_Z",0.986168329804986,0.0925800491471221,0.316496190672388,27
"368","EstBB_EUR_Z",0.987555385124586,0.0286432287004613,0.0394833486854393,27
"369","Generation_Scotland_EUR_Z",0.984925142802746,0.0716150287562398,0.142388724433706,27
"370","DECODE_EUR_Z",0.999718201250732,0.0512618437741581,0.0795751464786553,27
"371","MVP_EUR_Z",0.999129826194626,0.0443966500674167,0.130271603592105,27
"372","BoSCO_EUR_Z",0.998222783557483,0.0359005277769725,0.0521612337125028,27
"373","Amsterdam_UMC_COVID_study_group_EUR_Z",0.911884325479305,0.0970165938865812,0.21003961291722,27
"374","meta_Z",0.996714601848827,0.054356164774863,0.116558759040066,27
"375","HOSTAGE_EUR_Z",0.996746629790268,0.103698167140976,0.193617866467096,28
"376","GHS_Freeze_145_EUR_Z",0.986706077331538,0.192115033844255,0.419428737445626,28
"377","UKBB_EUR_Z",0.999728018960211,0.0208145007407018,0.0297093426862298,28
"378","GENCOVID_EUR_Z",0.999505981199744,0.0673015356139549,0.108717238573582,28
"379","BelCovid_EUR_Z",0.999706986330761,0.0776637753057837,0.117534253598823,28
"380","23ANDME_EUR_Z",0.996021892955152,0.103335926547659,0.275330429767461,28
"381","SweCovid_EUR_Z",0.997928662876803,0.0362859206230599,0.104399152792451,28
"382","BQC19_EUR_Z",0.996417131989554,0.0504240096598093,0.106532510727776,28
"383","idipaz24genetics_EUR_Z",0.991619880579089,0.064726198305787,0.159112901032828,28
"384","ANCESTRY_Freeze_Four_EUR_Z",0.984404222546479,0.0682552263548015,0.101730447077309,28
"385","EstBB_EUR_Z",0.999916106361675,0.0159565059447764,0.029138947868637,28
"386","Generation_Scotland_EUR_Z",0.993150020263495,0.0692212245736399,0.166621314818111,28
"387","DECODE_EUR_Z",0.999905560185049,0.0384715385041048,0.0738413955579982,28
"388","MVP_EUR_Z",0.999939041634757,0.0395354830763295,0.0699736515063329,28
"389","BoSCO_EUR_Z",0.953628275945103,0.127214268108066,0.380480757814329,28
"390","Amsterdam_UMC_COVID_study_group_EUR_Z",0.98314126972152,0.186044808227531,0.385032944988245,28
"391","meta_Z",0.993455976919658,0.10198685402926,0.23546573006364,28
src/visualization/Draw_Imputation_quality.R
View file @
11560560
...
...
@@ -4,7 +4,8 @@ library(cowplot)
setwd
(
"/mnt/zeus/GGS/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization"
)
cohort
=
""
tag
=
"one_causal"
for
(
cohort
in
c
(
""
,
"_small_cohort"
)){
N_eff
=
fread
(
paste0
(
"../../data/external/meta_data/N_effective"
,
cohort
,
".csv"
))
N_eff
=
as.data.frame
(
N_eff
)
...
...
@@ -13,27 +14,36 @@ for(cohort in c("", "_small_cohort")){
for
(
tag
in
c
(
"null"
,
"one_causal"
,
'two_opposite'
,
'two_causal'
)){
imp_file
=
paste0
(
"../../data/processed/Simulated/Imputed/Imputed"
,
cohort
,
"_"
,
tag
,
".csv"
)
Zscores_file
=
paste0
(
"../../data/processed/Simulated/Zscores/Zscore"
,
cohort
,
"_"
,
tag
,
".csv"
)
Imputation
=
fread
(
imp_file
)
Zscores
=
fread
(
Zscores_file
)
Imputation
$
V1
=
as.character
(
Imputation
$
V1
)
Zscores
$
V1
=
as.character
(
Zscores
$
V1
)
setkey
(
Zscores
,
V1
)
setkey
(
Imputation
,
V1
)
ID
=
intersect
(
Zscores
$
V1
,
Imputation
$
V1
)
Zscores
=
Zscores
[
ID
,]
cor_ref
=
data.frame
(
correlation
=
cor
(
Imputation
[,
which
(
grepl
(
"Z_"
,
names
(
Imputation
))),
with
=
FALSE
])[,
1
])
cor_ref
=
data.frame
(
correlation
=
diag
(
cor
(
Imputation
[
ID
,
grep
(
"Z_"
,
names
(
Zscores
),
value
=
TRUE
),
with
=
FALSE
],
Zscores
[
Imputation
$
V1
,
grep
(
"Z_"
,
names
(
Zscores
),
value
=
TRUE
),
with
=
FALSE
])))
cor_ref
[
"N"
]
=
N_eff
[
row.names
(
cor_ref
),
"N_effective"
]
head
(
cor_ref
)
p_cor
=
ggplot
(
cor_ref
,
aes
(
x
=
N
,
y
=
correlation
))
+
geom_line
()
+
geom_point
()
+
theme_minimal
()
p_cor
=
ggplot
(
cor_ref
,
aes
(
x
=
N
,
y
=
correlation
))
+
geom_line
()
+
geom_point
()
+
theme_minimal
()
+
ylim
(
c
(
0
,
1
))
p_cor
beta_hat
=
melt
(
Imputation
[,
c
(
1
,
which
(
grepl
(
"Beta_"
,
names
(
Imputation
)))),
with
=
FALSE
],
id.vars
=
1
)
psig
=
ggplot
(
beta_hat
,
aes
(
x
=
V1
,
y
=
abs
(
value
),
group
=
variable
))
+
geom_line
(
alpha
=
0.2
)
+
geom_line
(
data
=
beta_hat
[
variable
==
"Beta_ref"
,],
color
=
"midnightblue"
,
lwd
=
1.1
)
psig
=
psig
+
theme_minimal
()
+
xlab
(
"snp"
)
+
ylab
(
"beta"
)
beta_hat_prec
=
melt
(
Imputation
[,
c
(
"V1"
,
"Beta_ref"
,
paste0
(
"Beta_"
,
N_eff
[
N_eff
$
N_effective
>
50
,
"study"
])),
with
=
FALSE
],
id.vars
=
1
)
beta_hat_prec
=
melt
(
Imputation
[,
c
(
"V1"
,
"Beta_ref"
,
paste0
(
"Beta_"
,
N_eff
[
N_eff
$
study
!=
""
,
"study"
])),
with
=
FALSE
],
id.vars
=
1
)
psig_prec
=
ggplot
(
beta_hat_prec
,
aes
(
x
=
V1
,
y
=
abs
(
value
),
group
=
variable
))
+
geom_line
(
alpha
=
0.2
)
+
geom_line
(
data
=
beta_hat_prec
[
variable
==
"Beta_ref"
,],
color
=
"midnightblue"
,
lwd
=
1.1
)
psig_prec
=
psig_prec
+
theme_minimal
()
+
xlab
(
"snp"
)
+
ylab
(
"beta"
)
Beta_scatter
=
melt
(
Imputation
[,
c
(
1
,
which
(
grepl
(
"Beta_"
,
names
(
Imputation
)))),
with
=
FALSE
],
id.vars
=
c
(
1
,
2
))
ggplot
(
Beta_scatter
,
aes
(
x
=
Beta_ref
,
y
=
value
))
+
geom_point
()
Beta_scatter_prec
=
melt
(
Imputation
[,
c
(
"V1"
,
"Beta_ref"
,
paste0
(
"Beta_"
,
N_eff
[
N_eff
$
N_effective
>
50
,
"study"
])),
with
=
FALSE
],
id.vars
=
c
(
1
,
2
))
Beta_scatter_prec
=
melt
(
Imputation
[,
c
(
"V1"
,
"Beta_ref"
,
paste0
(
"Beta_"
,
N_eff
[
N_eff
$
study
!=
""
,
"study"
])),
with
=
FALSE
],
id.vars
=
c
(
1
,
2
))
Beta_scatter_prec
=
ggplot
(
Beta_scatter_prec
,
aes
(
x
=
Beta_ref
,
y
=
value
,
color
=
variable
))
+
geom_point
()
+
scale_colour_hue
()
+
theme_minimal
()
+
theme
(
legend.pos
=
"none"
)
+
xlab
(
'Beta'
)
+
ylab
(
"imputed Beta"
)
panel
=
plot_grid
(
p_cor
,
psig_prec
,
Beta_scatter_prec
,
labels
=
c
(
"A"
,
"B"
,
"C"
),
nrow
=
1
)
...
...
src/visualization/Draw_loci_real_data.R
View file @
11560560
...
...
@@ -4,7 +4,7 @@ library(ggplot2)
setwd
(
"/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization/"
)
loci_id
=
2
masking_type
=
"
global
_masking"
masking_type
=
"
random
_masking"
sample_size
=
fread
(
'/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/data/external/meta_data/N_effective.csv'
)
correlation_list
=
list
()
...
...
src/visualization/Imputation_strategy_real_data.R
View file @
11560560
...
...
@@ -31,8 +31,6 @@ z_col_var =grep("_Z_Var_imputed$", names(loci_all), value=TRUE)
sapply
(
Z_cols
,
get_study
)
Z_score
=
loci_all
[,
Z_cols
,
with
=
FALSE
]
Z_score_var
=
loci_all
[,
z_col_var
,
with
=
FALSE
]
Z_score_imputed
=
loci_all
[,
Z_col_imputed
,
with
=
FALSE
]
...
...
@@ -44,7 +42,6 @@ compute_meta_analysis <- function(x, min_samp_size=0){
study
=
get_study
(
nm
)
if
((
!
is.na
(
x
[
nm
])
&
!
is.na
(
sample_size
[
study
,
N_effective
])))
{
if
((
sample_size
[
study
,
N_effective
>
min_samp_size
])){
meta_denominator
=
meta_denominator
+
x
[
nm
]
*
sample_size
[
study
,
N_effective
^
0.5
]
meta_numerator
=
meta_numerator
+
sample_size
[
study
,
N_effective
]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment