Commit b9936427 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

first simulation + plot

parent 18917981
...@@ -82,7 +82,7 @@ if __name__ == '__main__': ...@@ -82,7 +82,7 @@ if __name__ == '__main__':
Ssize["per"] = Ssize.n_cases / (Ssize.n_cases + Ssize.n_controls) Ssize["per"] = Ssize.n_cases / (Ssize.n_cases + Ssize.n_controls)
Ssize["N_effective"] = np.floor(Ssize.per * (1- Ssize.per) * (Ssize.n_cases + Ssize.n_controls)) Ssize["N_effective"] = np.floor(Ssize.per * (1- Ssize.per) * (Ssize.n_cases + Ssize.n_controls))
Ssize.N_effective = Ssize.N_effective.astype(int, copy=False) Ssize.N_effective = Ssize.N_effective.astype(int, copy=False)
Ssize.to_csv("../../data/external/meta_data/N_effective.csv")
# get the type of simulation from command line # get the type of simulation from command line
tag = "one_causal" # possibles values are two_opposite, two_causal, one_causal tag = "one_causal" # possibles values are two_opposite, two_causal, one_causal
for tag in ['two_opposite', 'two_causal', 'one_causal']: for tag in ['two_opposite', 'two_causal', 'one_causal']:
...@@ -127,6 +127,5 @@ if __name__ == '__main__': ...@@ -127,6 +127,5 @@ if __name__ == '__main__':
All_Zscore["Z_{0}".format(study)] = Zscore All_Zscore["Z_{0}".format(study)] = Zscore
All_Zscore["Beta_{0}".format(study)] = Zscore / X_study.shape[0]**0.5 All_Zscore["Beta_{0}".format(study)] = Zscore / X_study.shape[0]**0.5
print(All_Zscore.head())
All_Zscore.reset_index().to_csv("../../data/processed/Simulated/Zscores/Zscore_{0}.csv".format(tag)) All_Zscore.to_csv("../../data/processed/Simulated/Zscores/Zscore_{0}.csv".format(tag))
library(Matrix) library(Matrix)
library(bindata) library(bindata)
nvar = 2000 # Number of SNPs nvar = 200 # Number of SNPs
P = runif(nvar, 0.1, 0.9) P = runif(nvar, 0.1, 0.9)
...@@ -19,7 +19,7 @@ for( i in 1:(nvar-1)){ ...@@ -19,7 +19,7 @@ for( i in 1:(nvar-1)){
p11_range = get_possible_p11(p1,p2) p11_range = get_possible_p11(p1,p2)
print(p11_range) print(p11_range)
p11 = p11_range[1] + (p11_range[2] - p11_range[1]) * 0.55 p11 = p11_range[1] + (p11_range[2] - p11_range[1]) * 0.6
C12 = p11 - p1 * p2 C12 = p11 - p1 * p2
cov_mat[i, j] = C12 cov_mat[i, j] = C12
...@@ -32,24 +32,27 @@ CC <- cov2cor(cov_mat) ...@@ -32,24 +32,27 @@ CC <- cov2cor(cov_mat)
## Simulate 10000 x-y pairs, and check that they have the specified ## Simulate 10000 x-y pairs, and check that they have the specified
## correlation structure ## correlation structure
larg=30 larg=50
dec_mat = matrix(rep(0, nvar^2), ncol=nvar) dec_mat = matrix(rep(0, nvar^2), ncol=nvar)
dec <- exp(seq(0,-4, len=larg+1)) dec <- exp(seq(0,-3, len=larg+1))
for(i in 1:nvar){ for(i in 1:nvar){
upb = min(i+larg, nvar) upb = min(i+larg, nvar)
dec_mat[i, i:upb] = dec[1:(upb-i+1)] dec_mat[i, i:upb] = dec[1:(upb-i+1)]
} }
print(CC[1:larg, 1:larg])
dec_mat = dec_mat + t(dec_mat) dec_mat = dec_mat + t(dec_mat)
diag(dec_mat) = 1 diag(dec_mat) = 1
CC_dec =CC*dec_mat CC_dec =CC*dec_mat
print(CC_dec[1:larg, 1:larg])
bincor = bincorr2commonprob(P,CC_dec) bincor = bincorr2commonprob(P,CC_dec)
Nind=16000 Nind=16000
print("## Generate genotype : ") print("## Generate genotype 1: ")
x1 <- rmvbin(Nind, margprob = P, bincorr = CC_dec) x1 <- rmvbin(Nind, margprob = P, bincorr = CC_dec)
print("## Generate genotype 2: ")
x2 <- rmvbin(Nind, margprob = P, bincorr = CC_dec) x2 <- rmvbin(Nind, margprob = P, bincorr = CC_dec)
print("## save genotype:") print("## save genotype:")
write.table(x1 + x2, "../../data/processed/Simulated/Genotypes/genotype2.csv", sep="\t") write.table(x1 + x2, "../../data/processed/Simulated/Genotypes/genotype2.csv", sep="\t")
......
# -*- coding: utf-8 -*-
import click
import logging
from pathlib import Path
from dotenv import find_dotenv, load_dotenv
@click.command()
@click.argument('input_filepath', type=click.Path(exists=True))
@click.argument('output_filepath', type=click.Path())
def main(input_filepath, output_filepath):
""" Runs data processing scripts to turn raw data from (../raw) into
cleaned data ready to be analyzed (saved in ../processed).
"""
logger = logging.getLogger(__name__)
logger.info('making final data set from raw data')
if __name__ == '__main__':
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)
# not used in this stub but often useful for finding various files
project_dir = Path(__file__).resolve().parents[2]
# find .env automagically by walking up directories until it's found, then
# load up the .env entries as environment variables
load_dotenv(find_dotenv())
main()
library(data.table)
library(ggplot2)
library(cowplot)
setwd("/mnt/zeus/GGS/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization")
N_eff = fread("../../data/external/meta_data/N_effective.csv")
Imputation = fread("../../data/processed/Simulated/Imputed/Imputed_one_causal.csv")
Imputation
N_eff = as.data.frame(N_eff)
row.names(N_eff) = paste0("Z_",N_eff$Name)
N_eff[order(N_eff$N_effective),]
cor_ref = data.frame(correlation = cor(Imputation[,which(grepl("Z_",names(Imputation))), with=FALSE])[,1])
cor_ref
cor_ref["N"] = N_eff[row.names(cor_ref), "N_effective"]
p_cor = ggplot(cor_ref, aes(x=N, y=correlation)) + geom_line() + geom_point() + theme_minimal()
p_cor
beta_hat = melt(Imputation[,c(1,which(grepl("Beta_",names(Imputation)))), with=FALSE], id.vars=1)
beta_hat
psig = ggplot(beta_hat, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat[variable=="Beta_ref",], color="midnightblue",lwd=1.1)
psig =psig + theme_minimal() + xlab("snp") + ylab("beta")
psig
beta_hat_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$N_effective > 100,"Name"])), with=FALSE], id.vars=1)
psig_prec = ggplot(beta_hat_prec, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat_prec[variable=="Beta_ref",], color="midnightblue",lwd=1.1)
psig_prec = psig_prec + theme_minimal() + xlab("snp") + ylab("beta")
psig_prec
Beta_scatter = melt(Imputation[,c(1,which(grepl("Beta_",names(Imputation)))), with=FALSE], id.vars=c(1,2))
ggplot(Beta_scatter, aes(x=Beta_ref, y=value))+geom_point()
Beta_scatter_prec = melt(Imputation[,c("V1","Beta_ref",paste0("Beta_",N_eff[N_eff$N_effective > 100,"Name"])), with=FALSE], id.vars=c(1,2))
Beta_scatter_prec = ggplot(Beta_scatter_prec, aes(x=Beta_ref, y=value, color=variable))+geom_point() + scale_colour_hue() + theme_minimal()+theme(legend.pos="none") + xlab('Beta') + ylab("imputed Beta")
png("../../reports/figures/Signal_variability_imputed.png", width=900, height=325)
plot_grid(p_cor, psig_prec, Beta_scatter_prec, labels=c("A", "B", "C"), nrow=1)
dev.off()
library(data.table)
require(lattice)
library(viridisLite)
setwd("/mnt/zeus/GGS/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization")
LD = fread("../../data/processed/Simulated/Genotypes/LD_matrix2.csv")
LD_mat = as.matrix(LD[,-1])
dimnames(LD_mat)[[2]] = NULL
coul <- plasma(10)
bk = seq(0,1, len=11)
png("../../reports/figures/LD_simulated.png")
levelplot(abs(LD_mat), col.regions = coul, pretty=TRUE, ylab="", xlab="", at = bk)
dev.off()
library(data.table)
library(ggplot2)
library(cowplot)
setwd("/mnt/zeus/GGS/PROJECT_imputation_covidhg/hgcovid_imputation/src/visualization")
N_eff = fread("../../data/external/meta_data/N_effective.csv")
Zscores = fread("../../data/processed/Simulated/Zscores/Zscore_one_causal.csv")
N_eff = as.data.frame(N_eff)
row.names(N_eff) = paste0("Z_",N_eff$Name)
cor_ref = data.frame(correlation = cor(Zscores[,which(grepl("Z_",names(Zscores))), with=FALSE])[,1])
cor_ref["N"] = N_eff[row.names(cor_ref), "N_effective"]
p_cor = ggplot(cor_ref, aes(x=N, y=correlation)) + geom_line() + geom_point() + theme_minimal()
beta_hat = melt(Zscores[,c(1,which(grepl("Beta_",names(Zscores)))), with=FALSE], id.vars=1)
psig = ggplot(beta_hat, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat[variable=="Beta_meta-analysis",], color="midnightblue",lwd=1.1)
psig =psig + theme_minimal() + xlab("snp") + ylab("beta")
beta_hat_prec = melt(Zscores[,c("V1","Beta_meta-analysis",paste0("Beta_",N_eff[N_eff$N_effective > 500,"Name"])), with=FALSE], id.vars=1)
psig_prec = ggplot(beta_hat_prec, aes(x=V1, y=abs(value), group=variable)) + geom_line(alpha=0.2) + geom_line(data=beta_hat_prec[variable=="Beta_meta-analysis",], color="midnightblue",lwd=1.1)
psig_prec = psig_prec + theme_minimal() + xlab("snp") + ylab("beta")
png("../../reports/figures/Signal_variability_simulated.png", width=900, height=325)
plot_grid(p_cor, psig, psig_prec, labels=c("A", "B", "C"), nrow=1)
dev.off()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment