Commit 11560560 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

results from simulation

parent f76f8cd3
......@@ -45,6 +45,7 @@ Project Organization
│ │
│   ├── features
│   │   └── add_hg_37_pos.py : perform liftover to hg37 (ad GnomAD is in hg37)
| |  |__ retrieve_filled_out.py : Retrieve european studies with more than 80% of filled out SNPS
│ │
│   ├── models
│ │ │
......@@ -55,9 +56,7 @@ Project Organization
impute Zscores using simulated data : mask 50 SNPs on
200, reimpute then save results assuming different sample size (based on hgcovid consortium or always 100)
|__ Impution_real_data.py
|__ Imputation_test_real_data.py : mask 10% of SNPs around significant loci and impute them back
│ │
│   └── visualization <- Scripts to create exploratory and results oriented visualizations
│   └── Draw_LD.R : Draw the LD matrix used to simulate Data
......@@ -65,6 +64,7 @@ Project Organization
| (intrinsic variability due to sample size)
| |__ Draw_signal_variability.R : draw imputed signal
| ( variability due to sample size + imputation error)
|_ Imputation_strategy_real_data.R : Compare meta_analysis based on original Z-scores vs Imputed Z-scores
└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
......
reports/figures/meta_analysis_perf.png

29.8 KB | W: | H:

reports/figures/meta_analysis_perf.png

25.5 KB | W: | H:

reports/figures/meta_analysis_perf.png
reports/figures/meta_analysis_perf.png
reports/figures/meta_analysis_perf.png
reports/figures/meta_analysis_perf.png
  • 2-up
  • Swipe
  • Onion skin
"","correlation","Max_L1_error","Mean_L1_error"
"meta_Z_imputed",0.995683217500374,1.22529552037174,0.0562121582752216
"meta_Z_imputed_R2weight",0.990440000978169,1.59426643449452,0.454110616671281
"meta_Z_imputed_high_samp",0.989021574001197,3.62404793904812,1.07203953928316
"meta_Z_imputed_valid",0.996024141348819,1.22529552037174,0.05602862121342
"meta_Z_imputed_valid_R2weight",0.996024141348819,1.22529552037174,0.05602862121342
"meta_Z_imputed_valid_high_samp",0.995289648693123,3.36583792058164,1.0635503576084
"meta_Z_imputed",0.997715664577698,2.10996809407546,0.0675865516980447
"meta_Z_imputed_R2weight",0.995827954783326,5.18573176225036,0.430368355995472
"meta_Z_imputed_high_samp",0.995382502246784,14.0988026398761,1.05536810725203
"meta_Z_imputed_valid",0.998172604572373,2.10996809407546,0.0660996762793101
"meta_Z_imputed_valid_R2weight",0.998172604572373,2.10996809407546,0.0660996762793099
"meta_Z_imputed_valid_high_samp",0.997893748220566,14.0588918701583,1.09929561392649
......@@ -356,3 +356,37 @@
"355","BoSCO_EUR_Z",0.985476354338373,0.0956705476111556,0.807414380574432,26
"356","Amsterdam_UMC_COVID_study_group_EUR_Z",0.968965449672844,0.128762104108945,0.798285013504903,26
"357","meta_Z",0.976898019792069,0.113298175037217,1.0469342369506,26
"358","HOSTAGE_EUR_Z",0.991328644870294,0.141343612759392,0.160127095950146,27
"359","GHS_Freeze_145_EUR_Z",0.99766767575791,0.0749037098892356,0.228020291817981,27
"360","UKBB_EUR_Z",0.997094759585677,0.0515461004095718,0.160799166711033,27
"361","GENCOVID_EUR_Z",0.909612515827131,0.136975421410257,0.435332338970176,27
"362","BelCovid_EUR_Z",0.996923577061899,0.0584577391022102,0.0744064871095851,27
"363","23ANDME_EUR_Z",0.998858805300257,0.0639462153774435,0.100111024149858,27
"364","SweCovid_EUR_Z",0.984107078760595,0.122312852774997,0.288453118204419,27
"365","BQC19_EUR_Z",0.991219808057431,0.0947491992326761,0.351113880427233,27
"366","idipaz24genetics_EUR_Z",0.978086957636132,0.149899608436772,0.441550907259962,27
"367","ANCESTRY_Freeze_Four_EUR_Z",0.986168329804986,0.0925800491471221,0.316496190672388,27
"368","EstBB_EUR_Z",0.987555385124586,0.0286432287004613,0.0394833486854393,27
"369","Generation_Scotland_EUR_Z",0.984925142802746,0.0716150287562398,0.142388724433706,27
"370","DECODE_EUR_Z",0.999718201250732,0.0512618437741581,0.0795751464786553,27
"371","MVP_EUR_Z",0.999129826194626,0.0443966500674167,0.130271603592105,27
"372","BoSCO_EUR_Z",0.998222783557483,0.0359005277769725,0.0521612337125028,27
"373","Amsterdam_UMC_COVID_study_group_EUR_Z",0.911884325479305,0.0970165938865812,0.21003961291722,27
"374","meta_Z",0.996714601848827,0.054356164774863,0.116558759040066,27
"375","HOSTAGE_EUR_Z",0.996746629790268,0.103698167140976,0.193617866467096,28
"376","GHS_Freeze_145_EUR_Z",0.986706077331538,0.192115033844255,0.419428737445626,28
"377","UKBB_EUR_Z",0.999728018960211,0.0208145007407018,0.0297093426862298,28
"378","GENCOVID_EUR_Z",0.999505981199744,0.0673015356139549,0.108717238573582,28
"379","BelCovid_EUR_Z",0.999706986330761,0.0776637753057837,0.117534253598823,28
"380","23ANDME_EUR_Z",0.996021892955152,0.103335926547659,0.275330429767461,28
"381","SweCovid_EUR_Z",0.997928662876803,0.0362859206230599,0.104399152792451,28
"382","BQC19_EUR_Z",0.996417131989554,0.0504240096598093,0.106532510727776,28
"383","idipaz24genetics_EUR_Z",0.991619880579089,0.064726198305787,0.159112901032828,28
"384","ANCESTRY_Freeze_Four_EUR_Z",0.984404222546479,0.0682552263548015,0.101730447077309,28
"385","EstBB_EUR_Z",0.999916106361675,0.0159565059447764,0.029138947868637,28
"386","Generation_Scotland_EUR_Z",0.993150020263495,0.0692212245736399,0.166621314818111,28
"387","DECODE_EUR_Z",0.999905560185049,0.0384715385041048,0.0738413955579982,28
"388","MVP_EUR_Z",0.999939041634757,0.0395354830763295,0.0699736515063329,28
"389","BoSCO_EUR_Z",0.953628275945103,0.127214268108066,0.380480757814329,28
"390","Amsterdam_UMC_COVID_study_group_EUR_Z",0.98314126972152,0.186044808227531,0.385032944988245,28
"391","meta_Z",0.993455976919658,0.10198685402926,0.23546573006364,28
......@@ -4,7 +4,8 @@ library(cowplot)
setwd("/mnt/zeus/GGS/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization")
cohort=""
tag="one_causal"
for(cohort in c("", "_small_cohort")){
N_eff = fread(paste0("../../data/external/meta_data/N_effective",cohort,".csv"))
N_eff = as.data.frame(N_eff)
......@@ -13,27 +14,36 @@ for(cohort in c("", "_small_cohort")){
for( tag in c("null","one_causal", 'two_opposite', 'two_causal')){
imp_file = paste0("../../data/processed/Simulated/Imputed/Imputed",cohort,"_",tag,".csv")
Zscores_file = paste0("../../data/processed/Simulated/Zscores/Zscore",cohort,"_",tag,".csv")
Imputation = fread(imp_file)
Zscores = fread(Zscores_file)
Imputation$V1 = as.character(Imputation$V1)
Zscores$V1 = as.character(Zscores$V1)
setkey(Zscores, V1)
setkey(Imputation, V1)
ID = intersect(Zscores$V1, Imputation$V1)
Zscores = Zscores[ID,]
cor_ref = data.frame(correlation = cor(Imputation[,which(grepl("Z_",names(Imputation))), with=FALSE])[,1])
cor_ref = data.frame(correlation = diag(cor(Imputation[ID,grep("Z_",names(Zscores), value=TRUE), with=FALSE],
Zscores[Imputation$V1, grep("Z_",names(Zscores), value=TRUE),with=FALSE])))
cor_ref["N"] = N_eff[row.names(cor_ref), "N_effective"]
head(cor_ref)
p_cor = ggplot(cor_ref, aes(x=N, y=correlation)) + geom_line() + geom_point() + theme_minimal()
p_cor = ggplot(cor_ref, aes(x=N, y=correlation)) + geom_line() + geom_point() + theme_minimal() +ylim(c(0,1))
p_cor
beta_hat = melt(Imputation[,c(1,which(grepl("Beta_",names(Imputation)))), with=FALSE], id.vars=1)
psig = ggplot(beta_hat, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat[variable=="Beta_ref",], color="midnightblue",lwd=1.1)
psig =psig + theme_minimal() + xlab("snp") + ylab("beta")
beta_hat_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$N_effective > 50,"study"])), with=FALSE], id.vars=1)
beta_hat_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$study!="","study"])), with=FALSE], id.vars=1)
psig_prec = ggplot(beta_hat_prec, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat_prec[variable=="Beta_ref",], color="midnightblue",lwd=1.1)
psig_prec = psig_prec + theme_minimal() + xlab("snp") + ylab("beta")
Beta_scatter = melt(Imputation[,c(1,which(grepl("Beta_",names(Imputation)))), with=FALSE], id.vars=c(1,2))
ggplot(Beta_scatter, aes(x=Beta_ref, y=value))+geom_point()
Beta_scatter_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$N_effective > 50,"study"])), with=FALSE], id.vars=c(1,2))
Beta_scatter_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$study!="","study"])), with=FALSE], id.vars=c(1,2))
Beta_scatter_prec = ggplot(Beta_scatter_prec, aes(x=Beta_ref, y=value, color=variable))+geom_point() + scale_colour_hue() + theme_minimal()+theme(legend.pos="none") + xlab('Beta') + ylab("imputed Beta")
panel = plot_grid(p_cor, psig_prec, Beta_scatter_prec, labels=c("A", "B", "C"), nrow=1)
......
......@@ -4,7 +4,7 @@ library(ggplot2)
setwd("/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization/")
loci_id = 2
masking_type = "global_masking"
masking_type = "random_masking"
sample_size = fread('/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/data/external/meta_data/N_effective.csv')
correlation_list = list()
......
......@@ -31,8 +31,6 @@ z_col_var =grep("_Z_Var_imputed$", names(loci_all), value=TRUE)
sapply(Z_cols, get_study)
Z_score = loci_all[,Z_cols, with=FALSE]
Z_score_var = loci_all[,z_col_var, with=FALSE]
Z_score_imputed = loci_all[,Z_col_imputed, with=FALSE]
......@@ -44,7 +42,6 @@ compute_meta_analysis <- function(x, min_samp_size=0){
study = get_study(nm)
if((!is.na(x[nm]) & !is.na(sample_size[study, N_effective])))
{
if((sample_size[study, N_effective>min_samp_size])){
meta_denominator = meta_denominator + x[nm] * sample_size[study, N_effective^0.5]
meta_numerator = meta_numerator + sample_size[study, N_effective]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment