Skip to content
Snippets Groups Projects
Commit 11560560 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

results from simulation

parent f76f8cd3
No related branches found
No related tags found
No related merge requests found
......@@ -45,6 +45,7 @@ Project Organization
│ │
│ ├── features
│ │ └── add_hg_37_pos.py : perform liftover to hg37 (ad GnomAD is in hg37)
| | |__ retrieve_filled_out.py : Retrieve european studies with more than 80% of filled out SNPS
│ │
│ ├── models
│ │ │
......@@ -55,9 +56,7 @@ Project Organization
impute Zscores using simulated data : mask 50 SNPs on
200, reimpute then save results assuming different sample size (based on hgcovid consortium or always 100)
|__ Impution_real_data.py
|__ Imputation_test_real_data.py : mask 10% of SNPs around significant loci and impute them back
│ │
│ └── visualization <- Scripts to create exploratory and results oriented visualizations
│ └── Draw_LD.R : Draw the LD matrix used to simulate Data
......@@ -65,6 +64,7 @@ Project Organization
| (intrinsic variability due to sample size)
| |__ Draw_signal_variability.R : draw imputed signal
| ( variability due to sample size + imputation error)
|_ Imputation_strategy_real_data.R : Compare meta_analysis based on original Z-scores vs Imputed Z-scores
└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
......
reports/figures/meta_analysis_perf.png

29.8 KiB | W: | H:

reports/figures/meta_analysis_perf.png

25.5 KiB | W: | H:

reports/figures/meta_analysis_perf.png
reports/figures/meta_analysis_perf.png
reports/figures/meta_analysis_perf.png
reports/figures/meta_analysis_perf.png
  • 2-up
  • Swipe
  • Onion skin
"","correlation","Max_L1_error","Mean_L1_error"
"meta_Z_imputed",0.995683217500374,1.22529552037174,0.0562121582752216
"meta_Z_imputed_R2weight",0.990440000978169,1.59426643449452,0.454110616671281
"meta_Z_imputed_high_samp",0.989021574001197,3.62404793904812,1.07203953928316
"meta_Z_imputed_valid",0.996024141348819,1.22529552037174,0.05602862121342
"meta_Z_imputed_valid_R2weight",0.996024141348819,1.22529552037174,0.05602862121342
"meta_Z_imputed_valid_high_samp",0.995289648693123,3.36583792058164,1.0635503576084
"meta_Z_imputed",0.997715664577698,2.10996809407546,0.0675865516980447
"meta_Z_imputed_R2weight",0.995827954783326,5.18573176225036,0.430368355995472
"meta_Z_imputed_high_samp",0.995382502246784,14.0988026398761,1.05536810725203
"meta_Z_imputed_valid",0.998172604572373,2.10996809407546,0.0660996762793101
"meta_Z_imputed_valid_R2weight",0.998172604572373,2.10996809407546,0.0660996762793099
"meta_Z_imputed_valid_high_samp",0.997893748220566,14.0588918701583,1.09929561392649
......@@ -356,3 +356,37 @@
"355","BoSCO_EUR_Z",0.985476354338373,0.0956705476111556,0.807414380574432,26
"356","Amsterdam_UMC_COVID_study_group_EUR_Z",0.968965449672844,0.128762104108945,0.798285013504903,26
"357","meta_Z",0.976898019792069,0.113298175037217,1.0469342369506,26
"358","HOSTAGE_EUR_Z",0.991328644870294,0.141343612759392,0.160127095950146,27
"359","GHS_Freeze_145_EUR_Z",0.99766767575791,0.0749037098892356,0.228020291817981,27
"360","UKBB_EUR_Z",0.997094759585677,0.0515461004095718,0.160799166711033,27
"361","GENCOVID_EUR_Z",0.909612515827131,0.136975421410257,0.435332338970176,27
"362","BelCovid_EUR_Z",0.996923577061899,0.0584577391022102,0.0744064871095851,27
"363","23ANDME_EUR_Z",0.998858805300257,0.0639462153774435,0.100111024149858,27
"364","SweCovid_EUR_Z",0.984107078760595,0.122312852774997,0.288453118204419,27
"365","BQC19_EUR_Z",0.991219808057431,0.0947491992326761,0.351113880427233,27
"366","idipaz24genetics_EUR_Z",0.978086957636132,0.149899608436772,0.441550907259962,27
"367","ANCESTRY_Freeze_Four_EUR_Z",0.986168329804986,0.0925800491471221,0.316496190672388,27
"368","EstBB_EUR_Z",0.987555385124586,0.0286432287004613,0.0394833486854393,27
"369","Generation_Scotland_EUR_Z",0.984925142802746,0.0716150287562398,0.142388724433706,27
"370","DECODE_EUR_Z",0.999718201250732,0.0512618437741581,0.0795751464786553,27
"371","MVP_EUR_Z",0.999129826194626,0.0443966500674167,0.130271603592105,27
"372","BoSCO_EUR_Z",0.998222783557483,0.0359005277769725,0.0521612337125028,27
"373","Amsterdam_UMC_COVID_study_group_EUR_Z",0.911884325479305,0.0970165938865812,0.21003961291722,27
"374","meta_Z",0.996714601848827,0.054356164774863,0.116558759040066,27
"375","HOSTAGE_EUR_Z",0.996746629790268,0.103698167140976,0.193617866467096,28
"376","GHS_Freeze_145_EUR_Z",0.986706077331538,0.192115033844255,0.419428737445626,28
"377","UKBB_EUR_Z",0.999728018960211,0.0208145007407018,0.0297093426862298,28
"378","GENCOVID_EUR_Z",0.999505981199744,0.0673015356139549,0.108717238573582,28
"379","BelCovid_EUR_Z",0.999706986330761,0.0776637753057837,0.117534253598823,28
"380","23ANDME_EUR_Z",0.996021892955152,0.103335926547659,0.275330429767461,28
"381","SweCovid_EUR_Z",0.997928662876803,0.0362859206230599,0.104399152792451,28
"382","BQC19_EUR_Z",0.996417131989554,0.0504240096598093,0.106532510727776,28
"383","idipaz24genetics_EUR_Z",0.991619880579089,0.064726198305787,0.159112901032828,28
"384","ANCESTRY_Freeze_Four_EUR_Z",0.984404222546479,0.0682552263548015,0.101730447077309,28
"385","EstBB_EUR_Z",0.999916106361675,0.0159565059447764,0.029138947868637,28
"386","Generation_Scotland_EUR_Z",0.993150020263495,0.0692212245736399,0.166621314818111,28
"387","DECODE_EUR_Z",0.999905560185049,0.0384715385041048,0.0738413955579982,28
"388","MVP_EUR_Z",0.999939041634757,0.0395354830763295,0.0699736515063329,28
"389","BoSCO_EUR_Z",0.953628275945103,0.127214268108066,0.380480757814329,28
"390","Amsterdam_UMC_COVID_study_group_EUR_Z",0.98314126972152,0.186044808227531,0.385032944988245,28
"391","meta_Z",0.993455976919658,0.10198685402926,0.23546573006364,28
......@@ -4,7 +4,8 @@ library(cowplot)
setwd("/mnt/zeus/GGS/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization")
cohort=""
tag="one_causal"
for(cohort in c("", "_small_cohort")){
N_eff = fread(paste0("../../data/external/meta_data/N_effective",cohort,".csv"))
N_eff = as.data.frame(N_eff)
......@@ -13,27 +14,36 @@ for(cohort in c("", "_small_cohort")){
for( tag in c("null","one_causal", 'two_opposite', 'two_causal')){
imp_file = paste0("../../data/processed/Simulated/Imputed/Imputed",cohort,"_",tag,".csv")
Zscores_file = paste0("../../data/processed/Simulated/Zscores/Zscore",cohort,"_",tag,".csv")
Imputation = fread(imp_file)
Zscores = fread(Zscores_file)
Imputation$V1 = as.character(Imputation$V1)
Zscores$V1 = as.character(Zscores$V1)
setkey(Zscores, V1)
setkey(Imputation, V1)
ID = intersect(Zscores$V1, Imputation$V1)
Zscores = Zscores[ID,]
cor_ref = data.frame(correlation = cor(Imputation[,which(grepl("Z_",names(Imputation))), with=FALSE])[,1])
cor_ref = data.frame(correlation = diag(cor(Imputation[ID,grep("Z_",names(Zscores), value=TRUE), with=FALSE],
Zscores[Imputation$V1, grep("Z_",names(Zscores), value=TRUE),with=FALSE])))
cor_ref["N"] = N_eff[row.names(cor_ref), "N_effective"]
head(cor_ref)
p_cor = ggplot(cor_ref, aes(x=N, y=correlation)) + geom_line() + geom_point() + theme_minimal()
p_cor = ggplot(cor_ref, aes(x=N, y=correlation)) + geom_line() + geom_point() + theme_minimal() +ylim(c(0,1))
p_cor
beta_hat = melt(Imputation[,c(1,which(grepl("Beta_",names(Imputation)))), with=FALSE], id.vars=1)
psig = ggplot(beta_hat, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat[variable=="Beta_ref",], color="midnightblue",lwd=1.1)
psig =psig + theme_minimal() + xlab("snp") + ylab("beta")
beta_hat_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$N_effective > 50,"study"])), with=FALSE], id.vars=1)
beta_hat_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$study!="","study"])), with=FALSE], id.vars=1)
psig_prec = ggplot(beta_hat_prec, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat_prec[variable=="Beta_ref",], color="midnightblue",lwd=1.1)
psig_prec = psig_prec + theme_minimal() + xlab("snp") + ylab("beta")
Beta_scatter = melt(Imputation[,c(1,which(grepl("Beta_",names(Imputation)))), with=FALSE], id.vars=c(1,2))
ggplot(Beta_scatter, aes(x=Beta_ref, y=value))+geom_point()
Beta_scatter_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$N_effective > 50,"study"])), with=FALSE], id.vars=c(1,2))
Beta_scatter_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$study!="","study"])), with=FALSE], id.vars=c(1,2))
Beta_scatter_prec = ggplot(Beta_scatter_prec, aes(x=Beta_ref, y=value, color=variable))+geom_point() + scale_colour_hue() + theme_minimal()+theme(legend.pos="none") + xlab('Beta') + ylab("imputed Beta")
panel = plot_grid(p_cor, psig_prec, Beta_scatter_prec, labels=c("A", "B", "C"), nrow=1)
......
......@@ -4,7 +4,7 @@ library(ggplot2)
setwd("/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization/")
loci_id = 2
masking_type = "global_masking"
masking_type = "random_masking"
sample_size = fread('/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/data/external/meta_data/N_effective.csv')
correlation_list = list()
......
......@@ -31,8 +31,6 @@ z_col_var =grep("_Z_Var_imputed$", names(loci_all), value=TRUE)
sapply(Z_cols, get_study)
Z_score = loci_all[,Z_cols, with=FALSE]
Z_score_var = loci_all[,z_col_var, with=FALSE]
Z_score_imputed = loci_all[,Z_col_imputed, with=FALSE]
......@@ -44,7 +42,6 @@ compute_meta_analysis <- function(x, min_samp_size=0){
study = get_study(nm)
if((!is.na(x[nm]) & !is.na(sample_size[study, N_effective])))
{
if((sample_size[study, N_effective>min_samp_size])){
meta_denominator = meta_denominator + x[nm] * sample_size[study, N_effective^0.5]
meta_numerator = meta_numerator + sample_size[study, N_effective]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment