diff --git a/Figures_manuscript/scripts/Fig_SUPnote4_simulation_test_boundaries.R b/Figures_manuscript/scripts/Fig_SUPnote4_simulation_test_boundaries.R new file mode 100644 index 0000000000000000000000000000000000000000..274f2793a3d739f64b16a68526bc0950b4f629fb --- /dev/null +++ b/Figures_manuscript/scripts/Fig_SUPnote4_simulation_test_boundaries.R @@ -0,0 +1,171 @@ +library(MASS) +library(ggplot2) +library(data.table) + +h1 = 0.4 +h2 = 0.4 +M = 1000 + +pgm= 0.4 * min(h1,h1) # unused +pgh = 0.5 * min(h1,h2) + +epsgh = matrix(c(h1, pgh, pgh, h2), ncol=2)/ M +epsgoh = matrix(c(h1, -pgh, -pgh, h2), ncol=2)/ M + +# heritable trait but not correlated +epsh = matrix(c(h1, 0, 0, h2), ncol=2)/M + +epshlow = matrix(c(0.1, 0, 0, 0.1), ncol=2)/M + +epsg1 = matrix(c(h1, pgh, pgh, h2), ncol=2)/ M +epsg2 = matrix(c(h1, -0.2, -0.2, h2), ncol=2)/ M + +# draw genetic effect according to several scenarios +BG_mat <-list() +#set.seed(1234) +#BG_mat <- append(BG_mat, list("G_cov_low_het_null_Gcor" = data.frame(mvrnorm(M, c(0,0),epshlow)))) # genetic effects with zero genetic correlation between traits (with low heritability) +set.seed(1234) +BG_mat <- append(BG_mat, list("G_cov_high"= data.frame(mvrnorm(M, c(0,0),epsgh)))) # genetic effects with a given positive genetic covariance +#set.seed(1234) +#BG_mat <- append(BG_mat, list("G_cov_heritable_null_Gcor" = data.frame(mvrnorm(M, c(0,0),epsh)))) # genetic effects with zero genetic correlation between traits +#set.seed(1234) +#BG_mat <- append(BG_mat, list("G_cov_opp_high"= data.frame(mvrnorm(M, c(0,0),epsgoh)))) # genetic effects with a given negative genetic covariance +#set.seed(1234) +#BG_mat <- append(BG_mat, list("G_bicov" = data.frame(rbind(mvrnorm(M/2, c(0,0),epsg1), mvrnorm(M/2, c(0,0), epsg2))))) # genetic effects with a given mixed (local) genetic covariance + + +sumZ <- function(Z, w){ return((as.matrix(Z) %*% matrix(w))^2 /(as.matrix(t(w))%*%sigr %*%as.matrix(w))[1,1])} # unused +Univ <- function(Z){max(abs(Z))} + +hypothesis ="G_cov_high" # unused +N= 10000 # unused +overlap= 0.5 # unused +sign_threshold = 5 *(10^-8) #/M +C_env = seq(-0.9, 0.9, len=5) # Control the environmental correlation. +Ns_frac = c(1, 0, 0.5) # Sample overlap +N_samp = c(50000) + + +ce=0.9 # unused + +for(hypothesis in names(BG_mat)){ + print(hypothesis) + + if(hypothesis=="null"){ + h1 = 0 + h2 = 0}else{ + h1 = 0.4 + h2 = 0.4 + } + + for(overlap in Ns_frac){ + simu = 1 + D_pval = data.frame(matrix(NA, nrow=length(N_samp)* length(C_env), ncol=4)) + names(D_pval) = c("N_samp", "env_cor","Omnibus", "Univ") + + for(ce in C_env){ + + pe = ce * min((1-h1), (1-h2)) + eps = matrix(c(1-h1, pe, pe, 1-h2), ncol=2) # correlation in environmental and non-additive genetic effects between two traits + + Beta = BG_mat[[hypothesis]] + # Genetic effects for the two trait + print(ce) + print(simu) + + for(N in N_samp){ + X1 = matrix(0, nrow=N, ncol=M) + X2 = matrix(0, nrow=N, ncol=M) + + D_pval[simu,"N_samp"] = N + D_pval[simu,"env_cor"] = ce + # Draw SNPs : + MAF = runif(M,0.05, 0.95) + i= 1 + + for(p in MAF){ + X1[,i] = rbinom(N, 2, p) + X2[,i] = rbinom(N, 2, p) + i = i +1 + } + + NS = dim(X1)[1] * overlap + + X2[1:NS, ] = X1[1:NS,] + + X1 = scale(X1) + X2 = scale(X2) + + set.seed(4321) + env1 = mvrnorm(N, c(0,0), eps) + env2 = mvrnorm(N, c(0,0), eps) + env2[1:NS, ] = env1[1:NS,] + + Y1 = as.matrix(X1) %*% Beta[,1] + env1[,1] + Y2 =as.matrix(X2) %*% Beta[,2] + env2[,2] + + + rho_ov= cor(Y1, Y2) * overlap # cor(Y1,Y2)=rho (rho=pe+pg); overlap=NS/N (fraction of overlapping samples out of all samples) + + D = data.frame(matrix(c(Y1, Y2), ncol = 2)) + png(paste0("/pasteur/zeus/projets/p02/GGS_JASS/5._ARTICLE_DISCOVERABILITY/Figures/outputs/simulations/Phenotype_", hypothesis,"_ov_",overlap,".png")) + p = ggplot(D, aes(x=Y1, y=Y2)) + geom_point() + print(p) + dev.off() + + Beta_estimate1 = t((t(Y1) %*% X1) %*% solve(t(X1) %*% X1, tol=10^-25)) + Beta_estimate2 = t((t(Y2) %*% X2) %*% solve(t(X2) %*% X2, tol=10^-25)) + + Beta_est = data.frame(B1 = Beta_estimate1, B2=Beta_estimate2) + + png(paste0("/pasteur/zeus/projets/p02/GGS_JASS/5._ARTICLE_DISCOVERABILITY/Figures/outputs/simulations/Beta_", hypothesis,"_ov_",overlap,".png")) + print(ggplot(Beta_est, aes(x=B1, y=B2)) + geom_point()) + dev.off() + Z = Beta_est + + Z[,1] = Beta_est[,1] *sqrt(N) + Z[,2] = Beta_est[,2] *sqrt(N) + Z = as.data.frame(Z) + names(Z) = c("z1", "z2") + + png(paste0("/pasteur/zeus/projets/p02/GGS_JASS/5._ARTICLE_DISCOVERABILITY/Figures/outputs/simulations/Z_", hypothesis,"_ov_",overlap,".png")) + print(ggplot(Z, aes(x=z1, y = z2)) + geom_point(alpha=0.5, size=1, color="blue")) + dev.off() + # + sigr = matrix(c(1,0,0,1),nrow=2,ncol=2) + sigr[1,2 ] = rho_ov + sigr[2, 1] = rho_ov + sigr + # Omnibus + Omni_stat= diag(as.matrix(Z[,1:2]) %*% solve(sigr) %*% as.matrix(t(Z[,1:2]))) + pval = 1-pchisq(Omni_stat, df=2) + + D_pval[simu, "Omnibus"]= mean(pval < sign_threshold) + Z["Omnibus"] = pval + + # Stat Univariate test + + stat = apply(Z[,1:2], 1, Univ) + pval = 1-pnorm(stat) + D_pval[simu,"Univ"] = mean(pval < sign_threshold) + Z["Univ"] = pval + + Z["Significance status"] = "None" + Z[(Z$Univ < sign_threshold) & (Z$Omnibus < sign_threshold) , "Significance status"] = "Both" + Z[(Z$Univ < sign_threshold) & (Z$Omnibus > sign_threshold) , "Significance status"] = "Univariate" + Z[(Z$Univ > sign_threshold) & (Z$Omnibus < sign_threshold) , "Significance status"] = "Omnibus" + + range_z <- max(abs(Z$z1),abs(Z$z2)) + + write.table(Z, paste0("/pasteur/zeus/projets/p02/GGS_JASS/5._ARTICLE_DISCOVERABILITY/Figures/outputs/simulations/Z_scores_", hypothesis, "_ov_", overlap,"_N_", N,"ce_",ce, ".csv"), sep="\t", row.names=TRUE) + + png(paste0("/pasteur/zeus/projets/p02/GGS_JASS/5._ARTICLE_DISCOVERABILITY/Figures/outputs/simulations/", hypothesis, "_ov_", overlap,"_N_", N,"ce_",ce, ".png"), width=1500, height=1500, res=300) + p = ggplot(Z , id.var=c("z1", "z2"), aes(x=z1, y=z2, color=`Significance status`)) + geom_point(size=1.5) + scale_colour_manual(values = c("#3ba3ec","grey", "#f77189","#50b131")) + xlim(-range_z,range_z) + ylim(-range_z,range_z) + print(p + theme_minimal()+ theme(legend.position = "top", panel.spacing = unit(1.5, "lines"), text=element_text(size=16)) + labs(color="Significance Status")) + dev.off() + simu = simu+1 + } + } + write.table(D_pval, paste0("/pasteur/zeus/projets/p02/GGS_JASS/5._ARTICLE_DISCOVERABILITY/Figures/outputs/simulations/", hypothesis, "_ov_", overlap,".csv"), sep="\t", row.names=TRUE) + } +}