Commit 4145bf14 authored by Hanna  JULIENNE's avatar Hanna JULIENNE

replicates in simulation

parent f845f3cd
Pipeline #11549 passed with stages
in 1 minute and 7 seconds
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -7,6 +7,7 @@ import seaborn as sns
import scipy
import raiss
import os
import sys
def exponential_decay(nsnp):
exp_decay = np.zeros((nsnp, nsnp))
......@@ -62,48 +63,93 @@ def generate_two_opposite(amplitude, LD_cor, X):
Zscore = compute_Zscore(Y, X)
return({"causal":beta, "Zscore":Zscore})
def get_cor(rd, Zscore, LD_cor, ids_known, ids_masked):
def define_signal_generator(tag):
if tag=="one_causal":
return generate_one_causal
if tag=="two_causal":
return generate_two_causal
if tag=="two_opposite":
return generate_two_opposite
def get_perf(rd, Zscore, LD_cor, ids_known, ids_masked):
imputed = raiss.stat_models.raiss_model(Zscore[ids_known], pd.DataFrame(LD_cor[ids_known,:][:,ids_known]), LD_cor[ids_masked,:][:,ids_known], rcond=rd)
cor_perf = np.corrcoef(imputed['mu'], Zscore[ids_masked])[0,1]
return(cor_perf)
MAE = np.linalg.norm(imputed['mu'] - Zscore[ids_masked], 1) / len(ids_masked) # L1- loss
id_large = np.where(np.abs(Zscore[ids_masked]) > 6)[0]
cor_large = np.corrcoef(imputed['mu'][id_large], Zscore[ids_masked][id_large])[0,1]
MAE_large = np.linalg.norm(imputed['mu'][id_large] - Zscore[ids_masked][id_large], 1) / len(id_large)
return({'cor':cor_perf, "MAE": MAE, "cor_large":cor_large, "MAE_large":MAE_large})
# Get best rd
def best_rd(zscore, LD_cor, ids_known, ids_masked):
rd_list = np.linspace(0.001,0.75, 100)
cor_perf = pd.Series(index=rd_list)
MAE_perf = pd.Series(index=rd_list)
cor_large = pd.Series(index=rd_list)
MAE_large = pd.Series(index=rd_list)
for rd in rd_list:
cor_perf[rd] = get_cor(rd, zscore, LD_cor, ids_known, ids_masked)
perf = get_perf(rd, zscore, LD_cor, ids_known, ids_masked)
cor_perf[rd] = perf["cor"]
MAE_perf[rd] = perf["MAE"]
cor_large[rd] = perf["cor_large"]
MAE_large[rd] = perf["MAE_large"]
return({"correlation": cor_perf[np.argmax(cor_perf)],
"MAE": MAE_perf[np.argmax(cor_perf)],
"correlation_large": cor_large[np.argmax(cor_perf)],
"MAE_large": MAE_large[np.argmax(cor_perf)],
"rcond":np.argmax(cor_perf)})
if __name__=="__main__":
# get the type of simulation from command line
tag = sys.argv[1] # possibles values are two_opposite, two_causal, one_causal
print("Simulating values for {}".format(tag))
#get genotype
X = pd.read_csv("./data/genotype.csv", sep="\t")
nsnp = X.shape[1]
# Estimate LD from genotype:
LD_cor = X.corr().values
n_rep = 100
n_masked = 40
max_amp = 50
ids_masked = np.random.choice(100, n_masked, replace=False)
ids_known = np.setdiff1d(np.array(range(100)), ids_masked)
amplitude_effect = pd.Series(index=np.arange(0,50,1))
tag = "two_opposite"
for amp in amplitude_effect.index:
print('search of best rd for amplitude {}'.format(amp))
signal = generate_two_opposite(amp, LD_cor, X)
Zscore = signal["Zscore"]
beta = signal["causal"]
if amp==25:
pd.Series(Zscore).to_csv("./results/Zscore_{0}.csv".format(tag))
pd.Series(beta).to_csv("./results/Beta_{0}.csv".format(tag))
res = best_rd(Zscore, LD_cor, ids_known, ids_masked)
print("best rd : {}".format(res['rcond']))
print("Correlation : {}".format(res['correlation']))
amplitude_effect.loc[amp] = res['correlation']
amplitude_effect = pd.DataFrame(index=np.arange(0, n_rep*max_amp, 1), columns = ["amplitude", "correlation", "MAE", "correlation_large", "MAE_large"])
id = 0
for amp in range(max_amp):
for rep in range(n_rep):
print('search of best rd for amplitude {}'.format(amp))
signal_generator = define_signal_generator(tag)
signal = signal_generator(amp, LD_cor, X)
Zscore = signal["Zscore"]
beta = signal["causal"]
if amp==25:
pd.Series(Zscore).to_csv("./results/Zscore_{0}.csv".format(tag))
pd.Series(beta).to_csv("./results/Beta_{0}.csv".format(tag))
res = best_rd(Zscore, LD_cor, ids_known, ids_masked)
print("best rd : {}".format(res['rcond']))
print("Correlation : {}".format(res['correlation']))
print("MAE : {}".format(res['MAE']))
amplitude_effect.loc[id, "amplitude"] = amp
amplitude_effect.loc[id, "correlation"] = res['correlation']
amplitude_effect.loc[id, "MAE"] = res["MAE"]
amplitude_effect.loc[id, "correlation_large"] = res['correlation_large']
amplitude_effect.loc[id, "MAE_large"] = res["MAE_large"]
id = id +1
amplitude_effect.to_csv("./results/amplitude_effect_{0}.csv".format(tag))
......@@ -2,48 +2,106 @@ library("ggplot2")
library("cowplot")
library(data.table)
setwd("~/Project/raiss/Simulation/")
setwd("~/Hanna/Projects/raiss/Simulation/")
plot_list = list()
plot_list_for_reviewer = list()
tag = "one_causal"
for( tag in c("one_positive", "two_positive", "two_opposite"))
for( tag in c("one_causal", "two_causal", "two_opposite"))
{
causal = read.csv(paste0("./results/Beta_",tag,".csv"), header=FALSE)
Zscore = read.csv(paste0("./results/Zscore_",tag,".csv"), header=FALSE)
performance = read.csv(paste0("./results/amplitude_effect_", tag, ".csv"), header=FALSE)
names(performance) = c("amplitude", "correlation")
performance = read.csv(paste0("./results/amplitude_effect_", tag, ".csv"), header=TRUE)
names(performance) = c("id", "amplitude", "correlation", "MAE", "correlation_large", "MAE_large")
performance = data.table(performance)
head(performance)
perf = performance[, .(mean(correlation), sd(correlation), mean(MAE), sd(MAE),
mean(correlation_large), sd(correlation_large), mean(MAE_large), sd(MAE_large), count=.N), by=amplitude]
names(perf) = c("amplitude", "correlation", "sd_cor","MAE", "sd_mae", "correlation_large", "sd_correlation_large", "MAE_large", "sd_MAE_large","count")
names(causal) =c("SNPid", "Beta")
causal["Zscore"] = Zscore[,2] * 25
causal["Zscore"] = Zscore[,2]
c_long = melt(causal, id.var= "SNPid")
p1 = ggplot(c_long, aes(x=SNPid, y=value, color=variable)) + geom_point() + geom_line(size=1)
p1 = p1 +scale_colour_manual(values=c("orangered", 'royalblue1'))
p1 = ggplot(c_long[rev(order(c_long[,2])),], aes(x=SNPid, y=value, color=variable)) + geom_line(size=0.6) +geom_point(size=0.5)
p1 = p1 + scale_colour_manual(values=c("orangered", 'royalblue1'))+ theme(legend.position='top',legend.justification="center")
legend_sig = get_legend(p1)
plot_list[[paste0(tag,"_signal")]] = p1 + ylab("") + theme(legend.position='none')
pzscore = ggplot(causal, aes(x=SNPid, y=Zscore)) + geom_line(size=0.6, color="royalblue1") +geom_point(size=0.5, color="royalblue1")
pcause = ggplot(causal, aes(x=SNPid, y=Beta)) + geom_line(size=0.6, color="orangered") +geom_point(size=0.5, color="orangered")
p2 =ggplot(performance, aes(x=amplitude, y=correlation)) + geom_point() + geom_line(color="royalblue1",size=1)
p= p2 + ylab("")
plot_list[[paste0(tag,"_perf")]] = p2 + ylim(0,1)
legend_sig = get_legend(p1)
if(tag != "one_causal")
{
plot_list[[paste0(tag,"_causal")]] = pcause + ylab("") + theme(legend.position='none')
plot_list[[paste0(tag,"_zscore")]] = pzscore + ylab("") + theme(legend.position='none')
}
else{
plot_list[[paste0(tag,"_causal")]] = pcause
plot_list[[paste0(tag,"_zscore")]] = pzscore
}
p2 =ggplot(performance, aes(x=amplitude, y=correlation)) #geom_point()
p2 = p2 + geom_line(color="royalblue1",size=1)
p2 = p2 + xlab("Effect size")
p2 =ggplot(perf, aes(x=amplitude, y=correlation)) + geom_point(data=performance, aes(x=amplitude, y=correlation), size=0.1, alpha=0.2) + geom_line(color="royalblue1",size=1) + geom_errorbar(aes(ymin=correlation-sd_cor/(count^0.5), ymax=correlation+sd_cor/(count^0.5)),color="royalblue1", position=position_dodge(0.05))
p2 = p2 + ylab("")
p2
if(tag != "one_causal")
{
plot_list[[paste0(tag,"_perf")]] = p2 + ylim(0,1) + ylab("")
}
else
{
plot_list[[paste0(tag,"_perf")]] = p2 + ylim(0,1) + ylab("Correlation")
}
# MAE graph
p3 = ggplot(perf, aes(x=amplitude, y=MAE)) + geom_point(data=performance, aes(x=amplitude, y=MAE), size=0.2) + geom_line(color="royalblue1",size=1) +geom_errorbar(aes(ymin=MAE-sd_mae/(count^0.5), ymax=MAE+sd_mae/(count^0.5)),color="royalblue1", position=position_dodge(0.05))
p3 = p3 + ylab("")+ ylim(0,15)
plot_list_for_reviewer[[paste0(tag, "_MAE")]] = p3
# cor_large graph
p4 = ggplot(perf, aes(x=amplitude, y=correlation_large)) + geom_point(data=performance, aes(x=amplitude, y=correlation_large), size=0.2) + geom_line(color="royalblue1",size=1) +geom_errorbar(aes(ymin=correlation_large-sd_correlation_large/(count^0.5), ymax=correlation_large+sd_correlation_large/(count^0.5)),color="royalblue1", position=position_dodge(0.05))
p4 = p4 + ylab("")+ ylim(0,1)
plot_list_for_reviewer[[paste0(tag, "_cor_large")]] = p4
# MAE_large graph
p5 = ggplot(perf, aes(x=amplitude, y=MAE_large)) + geom_point(data=performance, aes(x=amplitude, y=MAE_large), size=0.2) + geom_line(color="royalblue1",size=1) +geom_errorbar(aes(ymin=MAE_large-sd_MAE_large/(count^0.5), ymax=MAE_large+sd_MAE_large/(count^0.5)),color="royalblue1", position=position_dodge(0.05))
p5 = p5 + ylab("")+ ylim(0,15)
plot_list_for_reviewer[[paste0(tag, "_MAE_large")]] = p5
}
upper_panel = plot_grid(plot_list[[1]],plot_list[[4]],plot_list[[7]],
plot_list[[2]],plot_list[[5]],plot_list[[8]],
align="h", nrow=2)
upper_panel_with_leg = plot_grid(legend_sig, upper_panel, rel_heights=c(0.06, 0.94), nrow=2)
lower_panel = plot_grid(plot_list[[3]],
plot_list[[6]],plot_list[[9]],align="v", nrow=1)
upper_panel = plot_grid(plot_list[[1]],plot_list[[3]],
plot_list[[5]],align="h", nrow=1)
tiff(filename = "Supp_fig_causal2.tiff",
width = 900, height = 900, units = "px", pointsize = 12)
plot_grid(upper_panel_with_leg, lower_panel, rel_heights=c(0.65,0.35), labels=c("A", "B"), label_size=18, nrow=2)
dev.off()
legend_sig
upper_panel_with_leg = plot_grid(upper_panel, legend_sig, rel_widths=c(0.9, 0.1))
lower_panel = plot_grid(plot_list[[2]],
plot_list[[4]],plot_list[[6]],align="v")
upper_panel_with_leg
panel_MAE = plot_list_for_reviewer[[tag]]
panel_neuneu = plot_grid(plot_list_for_reviewer[[1]] + ylab("MAE"),plot_list_for_reviewer[[4]],plot_list_for_reviewer[[7]],
plot_list_for_reviewer[[2]] + ylab("Correlation_on_large"),plot_list_for_reviewer[[5]],plot_list_for_reviewer[[8]],
plot_list_for_reviewer[[3]]+ ylab("MAE_on_large"),plot_list_for_reviewer[[6]],plot_list_for_reviewer[[9]],
align="h", nrow=3)
plot_grid(plot_grid(plot_list[[1]],plot_list[[3]],
plot_list[[5]],plot_list[[2]],
plot_list[[4]],plot_list[[6]],align="v"), legend_sig, rel_widths=c(0.9,0.1))
tiff(filename = "for_reviewer_info.tiff",
width = 900, height = 900, units = "px", pointsize = 12)
panel_neuneu
dev.off()
0,-0.06659720915619761
1,2.577625602858331
2,1.1469867719537352
3,0.9648555901995858
4,-1.3924370603747185
5,0.5455878821217037
6,-0.015622357293898374
7,-0.9057201946209114
8,-1.5259318735326657
9,-0.10667869455962699
10,-1.2700916420885089
11,-1.046906321057954
12,0.7942163337022433
13,-0.03245679015785911
14,0.7539543645494159
15,-2.2062813625753086
16,-0.523913470668452
17,-1.0975441457343296
18,0.9447656433497813
19,0.9659164003866421
20,-1.6447745867353725
21,-0.5711211519644869
22,-0.014545677136408105
23,0.07822991470829127
24,-0.47442335792207235
0,-0.8869402161585944
1,0.6612390872476919
2,-0.8335827545945383
3,0.608329895185916
4,-0.3565991247915173
5,-0.9666506145673204
6,-1.0332139137068788
7,0.12959969600651824
8,-0.5743452686645449
9,-0.819768540048948
10,0.3166240551916395
11,-0.001034096845169535
12,-0.33714874226775554
13,1.1412254000650963
14,0.4609867113704986
15,1.1194474549784614
16,-0.7436053955612772
17,-1.2615122020387117
18,1.48342352546725
19,-0.14155180832367575
20,0.2853813484365789
21,-0.5103104091784904
22,1.7899155116797025
23,0.7943661363432482
24,-0.8446995865754815
25,25.0
26,1.2457631934356035
27,0.8059054840013314
28,0.8900303223444963
29,0.3841071548036905
30,-1.6215483878076464
31,0.9173298270751732
32,2.1051137684873935
33,0.40513711902625016
34,0.5370428010615295
35,-0.8834876082597699
36,0.21738180085521636
37,0.8731468789664147
38,0.48575530966908326
39,-1.198237307311863
40,-1.7440462780635753
41,-0.5459675218352041
42,-0.536219761092213
43,-3.1438583842306893
44,1.9955687530851183
45,-1.1222872494122194
46,-0.6651669782173592
47,-0.027716384451354204
48,-1.2998014938312377
49,0.717038812246902
50,1.1209245666899226
51,-1.8992705682908337
52,1.5167836142117292
53,-0.5090336252462357
54,0.9792280835591411
55,-2.0144005775011884
56,-1.2337823107028651
57,0.1659032601608288
58,-1.7336551243764382
59,0.5586926480155389
60,-0.33982790475564884
61,1.8016859290623504
62,-0.5784727078453189
63,-0.4992335262105399
64,-3.7990169263057147
65,0.10588728433215536
66,0.23014659468555623
67,-1.2680103437917565
68,0.5750392443720826
69,0.19238666227065293
70,1.0874200476696216
71,-0.3086326274079308
72,0.3462897660117345
73,1.4391908791554626
74,1.8332314530856884
26,0.579373108154632
27,-0.5691141496296795
28,1.2354022827422597
29,1.0606573530529075
30,0.8282054911958447
31,0.5935815019730685
32,0.025378418638609332
33,-0.08058290637278757
34,0.10061243556502907
35,-0.5477739696258856
36,2.2965829772221413
37,-0.4027922402181882
38,1.438566133990857
39,-0.496701446017469
40,-0.3868979305572139
41,-0.4108489340675367
42,0.679893702286411
43,0.028864108703664192
44,0.6104729637030691
45,-2.110676614652239
46,0.13411814689389556
47,-1.7161320919584688
48,-0.8471466817831336
49,1.3801224494279605
50,1.0632177893315686
51,-2.1887273818483663
52,-0.1513893110005833
53,-0.8474042472933034
54,0.8506221401022309
55,-0.6931308070482582
56,0.6727423446608372
57,0.1410861211357131
58,-1.6074991492059763
59,-0.16363766257680418
60,2.334849150191125
61,1.531318608250547
62,0.048406088914412906
63,1.6537384178833072
64,-0.419075457161441
65,-0.9744701833462486
66,-0.1311465586657584
67,0.16828715630822416
68,0.726867662105939
69,0.7360017259899292
70,0.6329112884596209
71,-0.22138098441855594
72,-0.8974451783257323
73,-0.797744271469423
74,0.06433133330985821
75,-25.0
76,0.3652614566618937
77,-0.34900281219013435
78,-0.9652120468306805
79,-1.0447589531171915
80,0.5735731331651008
81,0.14169334098646774
82,-0.519204208763682
83,-1.269831277479186
84,-1.1915633538863528
85,-0.011319713058671273
86,0.5435308830413256
87,0.5771773758273387
88,1.0295011147193367
89,1.6421990722822706
90,0.604588084047694
91,-0.7016264979212866
92,0.8169577827531166
93,-2.5362075786550204
94,-0.18629922258046866
95,-0.07287076494534865
96,-0.643586261250713
97,-0.6940939108748988
98,-1.1641243739169445
99,0.5843849227661781
76,-0.41806935699550474
77,-0.4909744151454624
78,-0.7183804900013461
79,-2.909689953602784
80,-0.05869701473301918
81,-0.47775701750714283
82,-0.9009327316093898
83,0.7023474585701137
84,0.13278437005378263
85,0.30536077272971385
86,0.8861855509074544
87,0.4193405912102993
88,-1.8960155653691282
89,-0.024086326106496192
90,-1.4967541399935167
91,-0.26099653663054495
92,0.31317998973730043
93,0.965000347623915
94,0.4172043647473007
95,0.16142085830463437
96,-0.3263661457564578
97,0.716929978261094
98,-0.16762360860595252
99,-0.2089843253852639
0,0.3196138303487168
1,2.9222403676378095
2,0.02335979092855516
3,-1.6581604742032665
4,-3.1401942882155023
5,-2.6875144826360597
6,-0.5447146785895811
7,-8.450514658043442
8,-4.882055261181963
9,-5.87009507636448
10,-6.681414120641463
11,-4.890242014014117
12,-3.1901110770984786
13,1.1418161805614622
14,-4.920061803752234
15,-7.7676739694486505
16,18.911115732693176
17,-7.141529797408189
18,15.52533555418591
19,19.349851982482704
20,22.479734077777906
21,26.36734098669195
22,12.848656704487318
23,23.5568150101544
24,12.02894899240281
25,70.9214277977418
26,36.79498785050427
27,1.1822200026499305
28,18.149683881027855
29,30.588269444979005
30,19.756354145390798
31,21.279520423282985
32,20.696477421708614
33,9.53005456384433
34,21.859647022651938
35,16.945502402240994
36,1.7334686584035295
37,2.0279202605246858
38,0.3761043910633778
39,-2.231654125193293
40,-5.071138768117451
41,-2.396616389560372
42,-2.009310980573785
43,-7.928596170868826
44,1.1151568663111182
45,-7.18551059975913
46,-5.2746447504079175
47,-6.563658661693237
48,-7.873150657003318
49,-4.312495888199843
50,-2.1067992961613293
51,-5.168192165085923
52,2.663528045302078
53,-1.878736710373114
54,-2.6189434311319686
55,-6.7845303166679845
56,-0.6519411197219013
57,2.673154948158105
58,-3.145573434331382
59,0.2765355118537216
60,-3.637440059831687
61,1.0575986396494692
62,-1.1698539380235426
63,-1.1529614282076786
64,-6.050239828414269
65,-1.3264882241299039
66,3.285520093694321
67,-6.166348776637403
68,-20.280627737432095
69,-19.928914523197292
70,-18.925362851836002
71,-3.4329611857650204
72,-12.599711387478516
73,3.581371419755567
74,-31.733061087268
75,-65.39850323626015
76,-8.745472269194595
77,0.1887074692391056
78,-35.78469554133108
79,-28.221660450860558
80,-16.90007167206915
81,2.9730522576862435
82,-23.407643913198502
83,-6.919131455955349
84,-12.204685427922394
85,0.8557313521749763
86,-1.8758447145236052
87,-2.5269719322799427
88,-2.463634994514339
89,-0.35519174086399263
90,-2.533421420184964
91,-2.044312639964267
92,-1.8549123514138548
93,-6.039313273591797
94,-3.318307849097539
95,-2.3412986205423514
96,2.6133579373576548
97,-1.3600697018932362
98,-2.651928505155647
99,-1.8435827516203644
0,-3.095525062540547
1,-2.0394874538435586
2,-2.9856638042356756
3,-1.5690555188027293
4,-0.9917356944553491
5,-6.9233426279333194
6,-4.313352078108361
7,-3.1003176324515898
8,-2.242928676915172
9,-3.128921956835766
10,0.08875851874822568
11,1.5164008344821744
12,0.5217168623481246
13,4.879539443275154
14,0.8207325852675359
15,2.4010326580960872
16,22.177880164815235
17,-0.6085494351165335
18,19.20746937090012
19,21.103557814353564
20,29.9258262205742
21,31.148938068406483
22,19.263276638056766
23,28.723793279271863
24,15.31132946840329
25,71.36287449237025
26,37.84847967428527
27,0.2303686230877523
28,21.54443305440848
29,33.996890006359344
30,28.313580323896165
31,23.22262456923725
32,20.79998272598421
33,12.11190386774618
34,23.03515045268077
35,19.691059064349012
36,5.977267300520829
37,3.7431937143693803
38,3.9916512991437467
39,2.080229013295296
40,1.6346777831332013
41,-1.0626994686708333
42,-1.2919461845332778
43,-1.992878818014338
44,-0.697175772067122
45,-7.332656861773248
46,-2.049683697573815
47,-5.372481831406521
48,-3.0201168106951113
49,-0.34434050570413566
50,1.2352431532453043
51,-4.996265336400906
52,2.352199650194564
53,-0.6007378340234545
54,-1.147391325078494
55,-2.874829450355964
56,6.502298003845161
57,6.504860958753113
58,1.1680182319352206
59,3.7194915716654267
60,4.902182635976309
61,4.581128921123655
62,2.9329670955516365
63,0.6933557526400508
64,-0.04830495822108148
65,-1.6944318226847226
66,2.2234204431425484
67,-3.7713972211167714
68,-17.677228850888046
69,-18.653015110847786
70,-18.626744005547017
71,-6.278244536912221
72,-17.307442450111346
73,-1.0038368695661073
74,-35.95663501034087
75,-66.47326671978807
76,-10.263741334150602
77,0.10227431726584715
78,-39.40487294921039
79,-34.66369292407956
80,-20.666053051813943
81,0.19691158882543155
82,-29.33495976162811
83,-3.8648586567064367
84,-7.959486180708682
85,2.7258963836262726
86,-2.2449684641583882
87,-0.988685132056945
88,-9.421273469055095
89,-3.8497131201268022
90,-3.107573306374102
91,-1.7775196593989966
92,-3.995601719522443
93,2.5595976012321167
94,0.0239810350008681
95,-0.04246647710039686
96,1.0874333402464913
97,1.6712864285678304
98,-0.5532902453162698
99,-1.692207136972914
......@@ -22,7 +22,6 @@ def generated_test_data(zscore, N_to_mask=5000, condition=None):