Commit 32d19200 authored by hjulienne's avatar hjulienne

new causal simulation

parent 6d7d87fd
Pipeline #10125 passed with stages
in 4 minutes and 14 seconds
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -6,6 +6,7 @@ import pandas as pd
import seaborn as sns
import scipy
import raiss
import os
def exponential_decay(nsnp):
exp_decay = np.zeros((nsnp, nsnp))
......@@ -21,19 +22,42 @@ def exponential_decay(nsnp):
# # # Create a vector of causal effect
# Insert one causal SNPs
def generate_one_causal(amplitude, LD_cor):
def compute_Y(beta, X, sig=1.0):
noise= np.random.normal(size=X.shape[0], scale=sig)
Y = X.dot(beta) + noise
return Y
def compute_Zscore(Y, X):
Zscores = np.zeros(X.shape[1])
for i in range(X.shape[1]):
Zscores[i] = X.iloc[:,i].dot(Y) / X.iloc[:,i].dot(X.iloc[:,i])
Zscores = (Zscores - Zscores.mean()) / np.std(Zscores)
return Zscores
def generate_one_causal(amplitude, LD_cor, X):
nsnp = LD_cor.shape[0]
beta = np.random.normal(size=nsnp)
beta[nsnp//2] = amplitude
Zscore =(beta.dot(LD_cor))/ nsnp
Y = compute_Y(beta, X)
Zscore = compute_Zscore(Y, X)
return({"causal":beta, "Zscore":Zscore})
def generate_two_causal(amplitude, LD_cor):
def generate_two_causal(amplitude, LD_cor, X):
nsnp = LD_cor.shape[0]
beta = np.random.normal(size=nsnp)
beta[nsnp//4] = amplitude
beta[3*(nsnp//4)] = amplitude
Zscore =(beta.dot(LD_cor))/ nsnp
Y = compute_Y(beta, X)
Zscore = compute_Zscore(Y, X)
return({"causal":beta, "Zscore":Zscore})
def generate_two_opposite(amplitude, LD_cor, X):
nsnp = LD_cor.shape[0]
beta = np.random.normal(size=nsnp)
beta[nsnp//4] = amplitude
beta[3*(nsnp//4)] = -amplitude
Y = compute_Y(beta, X)
Zscore = compute_Zscore(Y, X)
return({"causal":beta, "Zscore":Zscore})
def get_cor(rd, Zscore, LD_cor, ids_known, ids_masked):
......@@ -52,33 +76,33 @@ def best_rd(zscore, LD_cor, ids_known, ids_masked):
"rcond":np.argmax(cor_perf)})
if __name__=="__main__":
os.chdir("/home/hjulienn/Project/raiss/Simulation/")
#get genotype
X = pd.read_csv("./data/genotype.csv", sep="\t")
nsnp = X.shape[1]
nsnp = 100
# Draw causal effect in a normal distribution
LD = np.random.uniform(low=0.4, high=1.0, size=(nsnp,nsnp))
M_dec = exponential_decay(nsnp)
np.fill_diagonal(LD, 1)
LD = (LD + LD.transpose()) / 2
LD_cor = np.multiply(LD,M_dec)
# Estimate LD from genotype:
LD_cor = X.corr().values
n_masked = 40
ids_masked = np.random.choice(100, n_masked, replace=False)
ids_known = np.setdiff1d(np.array(range(100)), ids_masked)
amplitude_effect = pd.Series(index=np.arange(5,101,5))
amplitude_effect = pd.Series(index=np.arange(0,50,1))
tag = "one_positive"
for amp in amplitude_effect.index:
print('search of best rd for amplitude {}'.format(amp))
Zscore = generate_two_causal(amp, LD_cor)["Zscore"]
signal = generate_one_causal(amp, LD_cor, X)
Zscore = signal["Zscore"]
beta = signal["causal"]
if amp==25:
pd.Series(Zscore).to_csv("./results/Zscore_{0}.csv".format(tag))
pd.Series(beta).to_csv("./results/Beta_{0}.csv".format(tag))
res = best_rd(Zscore, LD_cor, ids_known, ids_masked)
print("best rd : {}".format(res['rcond']))
print("Correlation : {}".format(res['correlation']))
amplitude_effect.loc[amp] = res['correlation']
amplitude_effect.to_csv("./results/amplitude_effect_two_SNP.csv")
LD_cor[ids_known,:] [:,ids_masked]
amplitude_effect.to_csv("./results/amplitude_effect_{0}.csv".format(tag))
library("ggplot2")
library("cowplot")
library(data.table)
setwd("~/Project/raiss/Simulation/")
tag = "one_positive"
causal = read.csv("./results/Beta_one_positive.csv", header=FALSE)
Zscore = read.csv("./results/Zscore_one_positive.csv", header=FALSE)
names(causal) =c("SNPid", "Beta")
causal["Zscore"] = Zscore[,2]
c_long = melt(causal, id.var= "SNPid")
head(c_long)
p1 = ggplot(c_long, aes(x=SNPid, y=value, color=variable)) + geom_point() + geom_line(size=1)
P1 = p1 +scale_colour_manual(values=c("orangered", 'royalblue1'))
This diff is collapsed.
This diff is collapsed.
source("./constraints.R")
library(Matrix)
nvar = 100
P = runif(nvar, 0.05, 0.95)
cov_mat = matrix(rep(0, nvar^2), ncol=nvar)
diag(cov_mat) = P*(1-P)
for( i in 1:(nvar-1))
{
for(j in (i+1):nvar)
{
print(paste(i,j))
p1 = P[i]
p2 = P[j]
p11_range = get_possible_p11(p1,p2)
print(p11_range)
p11 = p11_range[1] + (p11_range[2] - p11_range[1]) * 0.75
C12 = p11 - p1 * p2
cov_mat[i, j] = C12
cov_mat[j, i] = C12
}
}
CC <- cov2cor(cov_mat)
CC
p11_range
p11
## Simulate 10000 x-y pairs, and check that they have the specified
## correlation structure
bandSparse(nvar, nvar ,10 )
dec_mat = matrix(rep(0, nvar^2), ncol=nvar)
dec <- exp(seq(0,-4, len=nvar))
for(i in 1:nvar)
{
upb = min(i+10, nvar)
dec_mat[i, i:upb] = 1 #dec[1:(nvar-i +1)]
}
dec_mat
upb
dec_mat = dec_mat + t(dec_mat)
diag(dec_mat) = 1
dec_mat[1:20,1:20]
CC_dec =CC*dec_mat
range(bincorr2commonprob(P, CC_dec))
x1 <- rmvbin(10000, margprob = P, bincorr = CC_dec)
x2 <- rmvbin(10000, margprob = P, bincorr = CC_dec)
write.table(x1 + x2, "genotype.csv", sep="\t")
write.table(CC_dec, "LD_matrix.csv", sep="\t")
0,-0.20391320410738456
1,0.5896547542873918
2,0.37144892788629014
3,0.650166489107797
4,-1.131375436310386
5,-0.9259078813459047
6,-0.2921084599419763
7,2.358943196845763
8,0.655846455683395
9,-0.032651495636440306
10,1.7444149724575428
11,-0.17465089932412567
12,0.7455382421872849
13,0.4061550191519106
14,1.169240241340028
15,-0.6997191617580774
16,0.4562797418557831
17,-0.9847662456325408
18,-0.70852865470202
19,1.2532631616315257
20,1.5264408428998801
21,0.6390942980567169
22,-1.3819083221459831
23,-0.6279196193054853
24,2.1044160492820345
25,-1.4106730322153502
26,-0.5596232009058112
27,0.015084115790351602
28,-0.523769971153434
29,0.2504540330303369
30,1.5375623196897161
31,-2.1992111169860635
32,0.6461588859841236
33,-1.1686415494934692
34,-0.368027564860922
35,0.3382711622177748
36,0.9737621871339831
37,-1.6704089787725418
38,0.9418891475554247
39,-0.03532618066933436
40,0.3076837095114559
41,-1.2952275499591008
42,0.8369640288245084
43,1.2399546642780792
44,-0.6993308912776207
45,-0.3801253263788971
46,-0.87234537510688
47,0.10984394854588932
48,0.5312882614592922
49,-0.12075249295635776
50,25.0
51,0.08399744589024126
52,-1.1046283845325797
53,0.5045901114297241
54,-0.7588817903077129
55,-1.7719059545761675
56,1.3512155856771575
57,0.07806179912151762
58,0.3041633372748736
59,-1.2758123124705802
60,-1.4404388172667104
61,1.4149070426057877
62,-3.1326132887602283
63,-0.6868928210506422
64,-2.1601056346493306
65,0.18636884839422
66,-0.9599884510734886
67,0.7553418701928796
68,-0.5801832181632969
69,0.7052561842516794
70,0.12383211677909362
71,-0.4835840816652854
72,-1.864494918674018
73,1.8677344551470052
74,-1.9008480778383947
75,0.6349959721978555
76,-1.4977901654729286
77,-0.39961356443361107
78,-0.8642360822301626
79,0.4156180728721907
80,0.8785800518453744
81,-0.26231877969107925
82,-0.37219707167695176
83,0.7447627076891659
84,-0.11710847466322802
85,0.7096945389518131
86,-0.8741936106047762
87,-1.4105819311420367
88,-0.5322861324525582
89,-0.07324981118981488
90,-0.3131723037475416
91,-0.6591520692639076
92,-0.212193394502751
93,0.1983576597482148
94,0.4553584120853499
95,0.7409328660513057
96,-0.6354978719313896
97,1.525947470875792
98,0.3588239775668098
99,1.234744442878398
0,-0.2833015557539734
1,1.1137193487298573
2,-0.5751020136949129
3,0.25092718459231605
4,-0.9231607328579114
5,-0.5282440465750906
6,0.8966397340387556
7,1.025137710975423
8,2.1545806809282317
9,0.7405209368866464
10,0.6835216263324101
11,0.6103050930728369
12,-0.1334374102833025
13,1.4198265246470219
14,-0.025425675136311564
15,-0.44907078365314856
16,-1.5468635674187794
17,-0.5331544970182626
18,0.2194942000732836
19,0.6472502923604606
20,0.05283022559676286
21,1.3045418778933164
22,-0.9714238416084638
23,0.5559887318273533
24,-0.7992569512233562
25,25.0
26,-0.21746831361418983
27,0.5311847032983822
28,0.40559197474537845
29,0.40004653031636067
30,0.6153284385690463
31,1.363194338296307
32,0.7987204228748891
33,0.6467852406053607
34,-1.5592575756448548
35,0.7061978684204476
36,-0.9446771100766019
37,1.6410623532318769
38,-1.3256669039968385
39,-0.4967990656260579
40,2.7105706932313183
41,-0.17613604065618832
42,-0.43625291106654057
43,0.4013655693053687
44,1.6598664756978174
45,0.9436156498836157
46,-0.3610573422939318
47,2.1240370571649465
48,0.3654456729669973
49,0.4171198578995682
50,0.1747094226609475
51,0.6345954372982099
52,-1.5657077531204562
53,0.8099454296078114
54,-1.3815475193464855
55,0.26020936637231856
56,-0.20284542049627843
57,-0.33906719397517926
58,-0.29419980048222344
59,-0.10678005663493949
60,-1.453940962144116
61,0.10148585215590376
62,-0.5973987733455278
63,-0.269209456269701
64,-1.4151314063321025
65,0.2842124751754514
66,0.5012142925546401
67,-1.177719555232783
68,0.7407081562232875
69,-0.10993602398780414
70,1.8539450877599215
71,1.6152160006055396
72,-0.8154731745973266
73,-0.2744559659603909
74,0.7560920125868593
75,-25.0
76,-1.9150153175861253
77,0.13397210387328198
78,0.017013103365785716
79,0.11670783527577634
80,0.681431453023927
81,0.2766025397593883
82,-0.31498643217328987
83,1.1419887748265651
84,0.975576757573083
85,0.43530209027570643
86,-0.042724153068193445
87,-0.30743917100881446
88,-1.2587673887159139
89,-1.2110997994562274
90,0.16308896184714816
91,-0.1727101897923054
92,-0.7604085037485097
93,0.09583220326441612
94,0.614459554724203
95,0.3051399492047042
96,-2.757893113161325
97,-0.0998303819669334
98,-0.06691073630522072
99,0.5789726272553339
0,1.8025575236210356
1,-0.5552270142696769
2,-0.4285639509661904
3,1.274804918459334
4,-0.34556049972408365
5,-0.943434906397131
6,-1.0006945589173073
7,1.6998794359108056
8,0.5794922020649045
9,0.3420347003400587
10,-1.1715776275618084
11,0.5131026059188307
12,-0.6685096379131924
13,-1.0000833404957266
14,1.277126489467953
15,-0.5806935173532369
16,-0.8328820947015481
17,-0.8826594496252301
18,0.841649072259141
19,-1.270587832100099
20,2.0967557281433007
21,-0.4873503766326562
22,1.3998862941791128
23,-0.7921499953043597
24,-1.3430248545806363
25,25.0
26,-0.8846193002442505
27,-0.21288221427235451
28,0.014392100161651485
29,-0.27055119878033135
30,-0.49235482768922934
31,-1.1544762602950638
32,-1.5613534606095019
33,-1.019053405789182
34,0.09578935284866708
35,0.4945653567017216
36,0.6960755202299728
37,0.21423772718673748
38,-1.2507737758391195
39,-0.2906763245368055
40,1.4661744730391222
41,-0.25977768219026887
42,-1.1060381796921626
43,0.5219011419277638
44,2.5247693005098775
45,-0.4407464012643449
46,-1.7629667202066674
47,0.3700089001386741
48,-0.5559860603489318
49,-0.6762478416106756
50,-0.2564100067387401
51,-0.934655005717179
52,0.46919113315844896
53,1.1794780014177042
54,0.47294837262751793
55,-0.7274191345827122
56,-0.8153347926859632
57,-1.073629780568152
58,0.09241045394991158
59,2.1520071696277214
60,-0.22467377685556852
61,-0.39592006777495803
62,0.055202893571084086
63,1.1760282132869206
64,1.0159331329693608
65,-0.8845658093370724
66,-0.8620435728803201
67,-2.036158860260977
68,0.14177664667461168
69,0.9816052381099066
70,1.4663518296259797
71,1.5912393132467109
72,-0.4687856607558591
73,-0.588175181845735
74,1.3488192909939074
75,25.0
76,0.9812475857172418
77,0.36927149387068453
78,-0.3940106011338063
79,1.10334146795949
80,-0.37571253094014795
81,-0.13176683082393542
82,1.2258718084360876
83,-1.432279357474846
84,-0.11524279602402654
85,-0.9140386887571811
86,0.8768226449110638
87,-0.0718466665982749
88,-1.4630020030377726
89,1.320867657314122
90,-0.2435636779113219
91,-0.05069104193553215
92,-0.048931110380280564
93,0.6305758146742046
94,-1.272890772117725
95,0.5193153805949056
96,-1.992678619330077
97,-0.6796150090568936
98,0.34732678471750633
99,-0.39558576822842995
0,0.5498947491964646
1,1.299165898760266
2,-0.8710414809780318
3,-0.30934167473045254
4,-1.0462903393885612
5,0.8930168256463261
6,1.1637741049931092
7,-0.4184661839089072
8,0.2893504668501217
9,-0.6770301243837661
10,-0.29813913438066947
11,-0.05792241149213983
12,-0.532612242597702
13,1.3316414421427039
14,-1.0865629857225259
15,-1.0527352532322805
16,0.6351883319354122
17,-0.9820077353349201
18,1.4323183530854509
19,1.7300415057806677
20,1.217265480918679
21,0.7911198976898782
22,-0.5441923809051735
23,-0.1753597894498861
24,-0.5118480283486864
25,0.2815528458336556
26,0.8349431689914809
27,-1.3437251279260438
28,-0.620630426774919
29,0.058308674742086966
30,0.03482321119270283
31,0.3384926244963026
32,-0.2985774082730817
33,-0.6368336540544478
34,0.10079760216154904
35,-0.07148649997760155
36,-0.5577607207012858
37,0.1742416204823311
38,1.2153482853159738
39,-0.24221025040566313
40,0.9955915636808921
41,0.17093510781718468
42,-1.1455153351076373
43,2.2840701298850523
44,0.3927828504534482
45,-0.37433992265876664
46,2.9723497561344514
47,2.396987540607526
48,1.4653725976979606
49,2.017789299278274
50,2.6864874612404934
51,-1.18403766195079
52,0.7301144760674169
53,-0.16011660966941504
54,-1.0849169237718879
55,-1.038058660563962
56,0.8673529892688497
57,1.2090825832895233
58,2.2094646358476253
59,0.4781396643294638
60,0.09952322346093803
61,0.24093518535208366
62,-0.29500693294521824
63,-1.3657008723969175
64,0.653537314736317
65,-1.0189448541234165
66,-1.3443526168152362
67,-0.8343629194589929
68,-0.30988956268865053
69,0.45103424883890153
70,0.48655487475084835
71,-1.0073315383952324
72,-0.8835446767339747
73,-1.111212682285474
74,-0.2809847289921364
75,0.3308284280258275
76,-0.890433572568488
77,-1.3295998961896511
78,0.11448676230415548
79,0.5742545881644717
80,1.394611197004362
81,-1.2332524416981983
82,0.30867758896643394
83,-0.9185475786583082
84,-0.7434567607263163
85,-1.2494595757290008
86,-0.5472432575383596
87,-0.8673752704175148
88,-0.3783727798266302
89,-0.3846470624434598
90,-0.868723936768242
91,-0.3935268169948425
92,0.21009420654750527
93,-0.49941870805766037
94,-0.8221409950280454
95,-1.023869339673185
96,0.6490823714139528
97,-1.1530384278151768
98,-1.1288173413116787
99,-0.5564116224098041
0,0.2702827386861409
1,0.7563338364628803
2,-0.3541994941983539
3,-0.13577934805710293
4,-0.3983451939851993
5,0.3069583373361758
6,0.7077697157326791
7,-0.20603713107885469
8,0.30086192558609964
9,-0.2549032703134758
10,-0.09888197549737025
11,0.09929025136179447
12,-0.2304264983301821
13,0.921202717271288
14,-0.4295815907640337
15,-0.39540274461378083
16,1.2139015398081703
17,-0.3732517102928621
18,2.0486785150086435
19,2.2943686434879433
20,2.0706178974794796
21,2.053675084740378
22,0.04203509359581056
23,0.7577540303973258
24,0.007799679921154333
25,3.7920336822779195
26,3.144907024210308
27,-0.47872129056587426
28,0.18309438937319922
29,1.6494967601597732
30,1.1095318653145463
31,2.133228490757363
32,0.49634830745334685
33,0.08384543334591689
34,1.119695615117088
35,0.8933253774671751
36,-0.16756812023413736
37,0.6179365316264301
38,1.0380731160283123
39,0.17632931839176258
40,0.5185312388663046
41,-0.012936736689084183
42,-0.448160320534273
43,0.6350632300808939
44,0.04949304805763625
45,-0.2432433769662173
46,0.6249858642899504
47,0.6378324850007271
48,0.11748128998251547
49,0.22769161433488594
50,-0.09357371888223588
51,-0.4689189671828656
52,-0.19975632460237797
53,-0.2423900870020736
54,-0.46665854632291665
55,-0.4147627139539826
56,-0.08037838869794499
57,0.1294563939194988
58,0.18008238721134304
59,-0.272417099340699
60,-0.3910002606170435
61,-0.14394383763819776
62,-0.23097788454609886
63,-0.49466561095255035
64,-0.03367094018390784
65,-0.37823017245840224
66,-0.46429573007963243
67,-0.4636254796424125
68,-0.9437093439415877
69,-1.4486710790972712
70,-1.2625949613347036
71,-0.3639555777448285
72,-0.5758778795245879
73,-0.39091630882825934
74,-1.4592141346660912
75,-3.666536400905963
76,-0.49446037809927185
77,-0.474168608815242
78,-2.034507828768904
79,-1.72464760476325
80,-1.8232095324001871
81,-0.40715068454419845
82,-1.5662393657867584
83,-0.37065090473595114
84,-0.39972568539550746
85,-0.4213817531365571
86,-0.2783008189352365
87,-0.3326800473218013
88,-0.39642183234812395
89,-0.32959639600890744
90,-0.3111062543245556
91,-0.2362374031733925
92,-0.36995830865420926
93,-0.20687695205811935
94,-0.31737136619679357
95,-0.38658864079411875
96,-0.24701656790929832
97,-0.41389003319426515
98,-0.4205225383142136
99,-0.2691037141965269
0,0.20330526070816188
1,0.38761753143515965
2,-0.8067079547964761
3,-0.4760318838707589
4,-0.8685987446962649
5,0.1724557168737688
6,0.22367497575720122
7,-0.5885922525256934
8,-0.0918646964906696
9,-0.7019200206353852
10,-0.5728169753044996
11,-0.35533633962233035
12,-0.6404234792625831
13,0.36716996070203867
14,-0.9103454316694869
15,-0.9174459402277044
16,0.6356413922293473
17,-0.8626736909919648
18,1.6944254894528852
19,1.5786871588720628
20,1.4357425999919904
21,1.2147480165947735
22,-0.3736867861588647
23,-0.020652171679399662
24,-0.5190591649906469
25,2.7000324786738528
26,2.2118013298682446
27,-1.0148225250637415
28,-0.33909766033086297
29,0.8601813561894409
30,0.29630884577005623
31,1.0561809289831288
32,-0.25244327325525545
33,-0.507171353548104
34,0.5883324929865971
35,0.26738309791216613
36,-0.6272539173734938
37,0.043802694447453876
38,0.46639844649763684
39,-0.34149112032696916
40,-0.0779400364137079
41,-0.47123294328110304