Commit 32d19200 by hjulienne

### new causal simulation

parent 6d7d87fd
Pipeline #10125 passed with stages
in 4 minutes and 14 seconds
This source diff could not be displayed because it is too large. You can view the blob instead.
 ... ... @@ -6,6 +6,7 @@ import pandas as pd import seaborn as sns import scipy import raiss import os def exponential_decay(nsnp): exp_decay = np.zeros((nsnp, nsnp)) ... ... @@ -21,19 +22,42 @@ def exponential_decay(nsnp): # # # Create a vector of causal effect # Insert one causal SNPs def generate_one_causal(amplitude, LD_cor): def compute_Y(beta, X, sig=1.0): noise= np.random.normal(size=X.shape[0], scale=sig) Y = X.dot(beta) + noise return Y def compute_Zscore(Y, X): Zscores = np.zeros(X.shape[1]) for i in range(X.shape[1]): Zscores[i] = X.iloc[:,i].dot(Y) / X.iloc[:,i].dot(X.iloc[:,i]) Zscores = (Zscores - Zscores.mean()) / np.std(Zscores) return Zscores def generate_one_causal(amplitude, LD_cor, X): nsnp = LD_cor.shape[0] beta = np.random.normal(size=nsnp) beta[nsnp//2] = amplitude Zscore =(beta.dot(LD_cor))/ nsnp Y = compute_Y(beta, X) Zscore = compute_Zscore(Y, X) return({"causal":beta, "Zscore":Zscore}) def generate_two_causal(amplitude, LD_cor): def generate_two_causal(amplitude, LD_cor, X): nsnp = LD_cor.shape[0] beta = np.random.normal(size=nsnp) beta[nsnp//4] = amplitude beta[3*(nsnp//4)] = amplitude Zscore =(beta.dot(LD_cor))/ nsnp Y = compute_Y(beta, X) Zscore = compute_Zscore(Y, X) return({"causal":beta, "Zscore":Zscore}) def generate_two_opposite(amplitude, LD_cor, X): nsnp = LD_cor.shape[0] beta = np.random.normal(size=nsnp) beta[nsnp//4] = amplitude beta[3*(nsnp//4)] = -amplitude Y = compute_Y(beta, X) Zscore = compute_Zscore(Y, X) return({"causal":beta, "Zscore":Zscore}) def get_cor(rd, Zscore, LD_cor, ids_known, ids_masked): ... ... @@ -52,33 +76,33 @@ def best_rd(zscore, LD_cor, ids_known, ids_masked): "rcond":np.argmax(cor_perf)}) if __name__=="__main__": os.chdir("/home/hjulienn/Project/raiss/Simulation/") #get genotype X = pd.read_csv("./data/genotype.csv", sep="\t") nsnp = X.shape[1] nsnp = 100 # Draw causal effect in a normal distribution LD = np.random.uniform(low=0.4, high=1.0, size=(nsnp,nsnp)) M_dec = exponential_decay(nsnp) np.fill_diagonal(LD, 1) LD = (LD + LD.transpose()) / 2 LD_cor = np.multiply(LD,M_dec) # Estimate LD from genotype: LD_cor = X.corr().values n_masked = 40 ids_masked = np.random.choice(100, n_masked, replace=False) ids_known = np.setdiff1d(np.array(range(100)), ids_masked) amplitude_effect = pd.Series(index=np.arange(5,101,5)) amplitude_effect = pd.Series(index=np.arange(0,50,1)) tag = "one_positive" for amp in amplitude_effect.index: print('search of best rd for amplitude {}'.format(amp)) Zscore = generate_two_causal(amp, LD_cor)["Zscore"] signal = generate_one_causal(amp, LD_cor, X) Zscore = signal["Zscore"] beta = signal["causal"] if amp==25: pd.Series(Zscore).to_csv("./results/Zscore_{0}.csv".format(tag)) pd.Series(beta).to_csv("./results/Beta_{0}.csv".format(tag)) res = best_rd(Zscore, LD_cor, ids_known, ids_masked) print("best rd : {}".format(res['rcond'])) print("Correlation : {}".format(res['correlation'])) amplitude_effect.loc[amp] = res['correlation'] amplitude_effect.to_csv("./results/amplitude_effect_two_SNP.csv") LD_cor[ids_known,:] [:,ids_masked] amplitude_effect.to_csv("./results/amplitude_effect_{0}.csv".format(tag))
 library("ggplot2") library("cowplot") library(data.table) setwd("~/Project/raiss/Simulation/") tag = "one_positive" causal = read.csv("./results/Beta_one_positive.csv", header=FALSE) Zscore = read.csv("./results/Zscore_one_positive.csv", header=FALSE) names(causal) =c("SNPid", "Beta") causal["Zscore"] = Zscore[,2] c_long = melt(causal, id.var= "SNPid") head(c_long) p1 = ggplot(c_long, aes(x=SNPid, y=value, color=variable)) + geom_point() + geom_line(size=1) P1 = p1 +scale_colour_manual(values=c("orangered", 'royalblue1'))
This diff is collapsed.
This diff is collapsed.
 source("./constraints.R") library(Matrix) nvar = 100 P = runif(nvar, 0.05, 0.95) cov_mat = matrix(rep(0, nvar^2), ncol=nvar) diag(cov_mat) = P*(1-P) for( i in 1:(nvar-1)) { for(j in (i+1):nvar) { print(paste(i,j)) p1 = P[i] p2 = P[j] p11_range = get_possible_p11(p1,p2) print(p11_range) p11 = p11_range[1] + (p11_range[2] - p11_range[1]) * 0.75 C12 = p11 - p1 * p2 cov_mat[i, j] = C12 cov_mat[j, i] = C12 } } CC <- cov2cor(cov_mat) CC p11_range p11 ## Simulate 10000 x-y pairs, and check that they have the specified ## correlation structure bandSparse(nvar, nvar ,10 ) dec_mat = matrix(rep(0, nvar^2), ncol=nvar) dec <- exp(seq(0,-4, len=nvar)) for(i in 1:nvar) { upb = min(i+10, nvar) dec_mat[i, i:upb] = 1 #dec[1:(nvar-i +1)] } dec_mat upb dec_mat = dec_mat + t(dec_mat) diag(dec_mat) = 1 dec_mat[1:20,1:20] CC_dec =CC*dec_mat range(bincorr2commonprob(P, CC_dec)) x1 <- rmvbin(10000, margprob = P, bincorr = CC_dec) x2 <- rmvbin(10000, margprob = P, bincorr = CC_dec) write.table(x1 + x2, "genotype.csv", sep="\t") write.table(CC_dec, "LD_matrix.csv", sep="\t")
 0,-0.20391320410738456 1,0.5896547542873918 2,0.37144892788629014 3,0.650166489107797 4,-1.131375436310386 5,-0.9259078813459047 6,-0.2921084599419763 7,2.358943196845763 8,0.655846455683395 9,-0.032651495636440306 10,1.7444149724575428 11,-0.17465089932412567 12,0.7455382421872849 13,0.4061550191519106 14,1.169240241340028 15,-0.6997191617580774 16,0.4562797418557831 17,-0.9847662456325408 18,-0.70852865470202 19,1.2532631616315257 20,1.5264408428998801 21,0.6390942980567169 22,-1.3819083221459831 23,-0.6279196193054853 24,2.1044160492820345 25,-1.4106730322153502 26,-0.5596232009058112 27,0.015084115790351602 28,-0.523769971153434 29,0.2504540330303369 30,1.5375623196897161 31,-2.1992111169860635 32,0.6461588859841236 33,-1.1686415494934692 34,-0.368027564860922 35,0.3382711622177748 36,0.9737621871339831 37,-1.6704089787725418 38,0.9418891475554247 39,-0.03532618066933436 40,0.3076837095114559 41,-1.2952275499591008 42,0.8369640288245084 43,1.2399546642780792 44,-0.6993308912776207 45,-0.3801253263788971 46,-0.87234537510688 47,0.10984394854588932 48,0.5312882614592922 49,-0.12075249295635776 50,25.0 51,0.08399744589024126 52,-1.1046283845325797 53,0.5045901114297241 54,-0.7588817903077129 55,-1.7719059545761675 56,1.3512155856771575 57,0.07806179912151762 58,0.3041633372748736 59,-1.2758123124705802 60,-1.4404388172667104 61,1.4149070426057877 62,-3.1326132887602283 63,-0.6868928210506422 64,-2.1601056346493306 65,0.18636884839422 66,-0.9599884510734886 67,0.7553418701928796 68,-0.5801832181632969 69,0.7052561842516794 70,0.12383211677909362 71,-0.4835840816652854 72,-1.864494918674018 73,1.8677344551470052 74,-1.9008480778383947 75,0.6349959721978555 76,-1.4977901654729286 77,-0.39961356443361107 78,-0.8642360822301626 79,0.4156180728721907 80,0.8785800518453744 81,-0.26231877969107925 82,-0.37219707167695176 83,0.7447627076891659 84,-0.11710847466322802 85,0.7096945389518131 86,-0.8741936106047762 87,-1.4105819311420367 88,-0.5322861324525582 89,-0.07324981118981488 90,-0.3131723037475416 91,-0.6591520692639076 92,-0.212193394502751 93,0.1983576597482148 94,0.4553584120853499 95,0.7409328660513057 96,-0.6354978719313896 97,1.525947470875792 98,0.3588239775668098 99,1.234744442878398
 0,-0.2833015557539734 1,1.1137193487298573 2,-0.5751020136949129 3,0.25092718459231605 4,-0.9231607328579114 5,-0.5282440465750906 6,0.8966397340387556 7,1.025137710975423 8,2.1545806809282317 9,0.7405209368866464 10,0.6835216263324101 11,0.6103050930728369 12,-0.1334374102833025 13,1.4198265246470219 14,-0.025425675136311564 15,-0.44907078365314856 16,-1.5468635674187794 17,-0.5331544970182626 18,0.2194942000732836 19,0.6472502923604606 20,0.05283022559676286 21,1.3045418778933164 22,-0.9714238416084638 23,0.5559887318273533 24,-0.7992569512233562 25,25.0 26,-0.21746831361418983 27,0.5311847032983822 28,0.40559197474537845 29,0.40004653031636067 30,0.6153284385690463 31,1.363194338296307 32,0.7987204228748891 33,0.6467852406053607 34,-1.5592575756448548 35,0.7061978684204476 36,-0.9446771100766019 37,1.6410623532318769 38,-1.3256669039968385 39,-0.4967990656260579 40,2.7105706932313183 41,-0.17613604065618832 42,-0.43625291106654057 43,0.4013655693053687 44,1.6598664756978174 45,0.9436156498836157 46,-0.3610573422939318 47,2.1240370571649465 48,0.3654456729669973 49,0.4171198578995682 50,0.1747094226609475 51,0.6345954372982099 52,-1.5657077531204562 53,0.8099454296078114 54,-1.3815475193464855 55,0.26020936637231856 56,-0.20284542049627843 57,-0.33906719397517926 58,-0.29419980048222344 59,-0.10678005663493949 60,-1.453940962144116 61,0.10148585215590376 62,-0.5973987733455278 63,-0.269209456269701 64,-1.4151314063321025 65,0.2842124751754514 66,0.5012142925546401 67,-1.177719555232783 68,0.7407081562232875 69,-0.10993602398780414 70,1.8539450877599215 71,1.6152160006055396 72,-0.8154731745973266 73,-0.2744559659603909 74,0.7560920125868593 75,-25.0 76,-1.9150153175861253 77,0.13397210387328198 78,0.017013103365785716 79,0.11670783527577634 80,0.681431453023927 81,0.2766025397593883 82,-0.31498643217328987 83,1.1419887748265651 84,0.975576757573083 85,0.43530209027570643 86,-0.042724153068193445 87,-0.30743917100881446 88,-1.2587673887159139 89,-1.2110997994562274 90,0.16308896184714816 91,-0.1727101897923054 92,-0.7604085037485097 93,0.09583220326441612 94,0.614459554724203 95,0.3051399492047042 96,-2.757893113161325 97,-0.0998303819669334 98,-0.06691073630522072 99,0.5789726272553339
 0,1.8025575236210356 1,-0.5552270142696769 2,-0.4285639509661904 3,1.274804918459334 4,-0.34556049972408365 5,-0.943434906397131 6,-1.0006945589173073 7,1.6998794359108056 8,0.5794922020649045 9,0.3420347003400587 10,-1.1715776275618084 11,0.5131026059188307 12,-0.6685096379131924 13,-1.0000833404957266 14,1.277126489467953 15,-0.5806935173532369 16,-0.8328820947015481 17,-0.8826594496252301 18,0.841649072259141 19,-1.270587832100099 20,2.0967557281433007 21,-0.4873503766326562 22,1.3998862941791128 23,-0.7921499953043597 24,-1.3430248545806363 25,25.0 26,-0.8846193002442505 27,-0.21288221427235451 28,0.014392100161651485 29,-0.27055119878033135 30,-0.49235482768922934 31,-1.1544762602950638 32,-1.5613534606095019 33,-1.019053405789182 34,0.09578935284866708 35,0.4945653567017216 36,0.6960755202299728 37,0.21423772718673748 38,-1.2507737758391195 39,-0.2906763245368055 40,1.4661744730391222 41,-0.25977768219026887 42,-1.1060381796921626 43,0.5219011419277638 44,2.5247693005098775 45,-0.4407464012643449 46,-1.7629667202066674 47,0.3700089001386741 48,-0.5559860603489318 49,-0.6762478416106756 50,-0.2564100067387401 51,-0.934655005717179 52,0.46919113315844896 53,1.1794780014177042 54,0.47294837262751793 55,-0.7274191345827122 56,-0.8153347926859632 57,-1.073629780568152 58,0.09241045394991158 59,2.1520071696277214 60,-0.22467377685556852 61,-0.39592006777495803 62,0.055202893571084086 63,1.1760282132869206 64,1.0159331329693608 65,-0.8845658093370724 66,-0.8620435728803201 67,-2.036158860260977 68,0.14177664667461168 69,0.9816052381099066 70,1.4663518296259797 71,1.5912393132467109 72,-0.4687856607558591 73,-0.588175181845735 74,1.3488192909939074 75,25.0 76,0.9812475857172418 77,0.36927149387068453 78,-0.3940106011338063 79,1.10334146795949 80,-0.37571253094014795 81,-0.13176683082393542 82,1.2258718084360876 83,-1.432279357474846 84,-0.11524279602402654 85,-0.9140386887571811 86,0.8768226449110638 87,-0.0718466665982749 88,-1.4630020030377726 89,1.320867657314122 90,-0.2435636779113219 91,-0.05069104193553215 92,-0.048931110380280564 93,0.6305758146742046 94,-1.272890772117725 95,0.5193153805949056 96,-1.992678619330077 97,-0.6796150090568936 98,0.34732678471750633 99,-0.39558576822842995
 0,0.5498947491964646 1,1.299165898760266 2,-0.8710414809780318 3,-0.30934167473045254 4,-1.0462903393885612 5,0.8930168256463261 6,1.1637741049931092 7,-0.4184661839089072 8,0.2893504668501217 9,-0.6770301243837661 10,-0.29813913438066947 11,-0.05792241149213983 12,-0.532612242597702 13,1.3316414421427039 14,-1.0865629857225259 15,-1.0527352532322805 16,0.6351883319354122 17,-0.9820077353349201 18,1.4323183530854509 19,1.7300415057806677 20,1.217265480918679 21,0.7911198976898782 22,-0.5441923809051735 23,-0.1753597894498861 24,-0.5118480283486864 25,0.2815528458336556 26,0.8349431689914809 27,-1.3437251279260438 28,-0.620630426774919 29,0.058308674742086966 30,0.03482321119270283 31,0.3384926244963026 32,-0.2985774082730817 33,-0.6368336540544478 34,0.10079760216154904 35,-0.07148649997760155 36,-0.5577607207012858 37,0.1742416204823311 38,1.2153482853159738 39,-0.24221025040566313 40,0.9955915636808921 41,0.17093510781718468 42,-1.1455153351076373 43,2.2840701298850523 44,0.3927828504534482 45,-0.37433992265876664 46,2.9723497561344514 47,2.396987540607526 48,1.4653725976979606 49,2.017789299278274 50,2.6864874612404934 51,-1.18403766195079 52,0.7301144760674169 53,-0.16011660966941504 54,-1.0849169237718879 55,-1.038058660563962 56,0.8673529892688497 57,1.2090825832895233 58,2.2094646358476253 59,0.4781396643294638 60,0.09952322346093803 61,0.24093518535208366 62,-0.29500693294521824 63,-1.3657008723969175 64,0.653537314736317 65,-1.0189448541234165 66,-1.3443526168152362 67,-0.8343629194589929 68,-0.30988956268865053 69,0.45103424883890153 70,0.48655487475084835 71,-1.0073315383952324 72,-0.8835446767339747 73,-1.111212682285474 74,-0.2809847289921364 75,0.3308284280258275 76,-0.890433572568488 77,-1.3295998961896511 78,0.11448676230415548 79,0.5742545881644717 80,1.394611197004362 81,-1.2332524416981983 82,0.30867758896643394 83,-0.9185475786583082 84,-0.7434567607263163 85,-1.2494595757290008 86,-0.5472432575383596 87,-0.8673752704175148 88,-0.3783727798266302 89,-0.3846470624434598 90,-0.868723936768242 91,-0.3935268169948425 92,0.21009420654750527 93,-0.49941870805766037 94,-0.8221409950280454 95,-1.023869339673185 96,0.6490823714139528 97,-1.1530384278151768 98,-1.1288173413116787 99,-0.5564116224098041
 0,0.2702827386861409 1,0.7563338364628803 2,-0.3541994941983539 3,-0.13577934805710293 4,-0.3983451939851993 5,0.3069583373361758 6,0.7077697157326791 7,-0.20603713107885469 8,0.30086192558609964 9,-0.2549032703134758 10,-0.09888197549737025 11,0.09929025136179447 12,-0.2304264983301821 13,0.921202717271288 14,-0.4295815907640337 15,-0.39540274461378083 16,1.2139015398081703 17,-0.3732517102928621 18,2.0486785150086435 19,2.2943686434879433 20,2.0706178974794796 21,2.053675084740378 22,0.04203509359581056 23,0.7577540303973258 24,0.007799679921154333 25,3.7920336822779195 26,3.144907024210308 27,-0.47872129056587426 28,0.18309438937319922 29,1.6494967601597732 30,1.1095318653145463 31,2.133228490757363 32,0.49634830745334685 33,0.08384543334591689 34,1.119695615117088 35,0.8933253774671751 36,-0.16756812023413736 37,0.6179365316264301 38,1.0380731160283123 39,0.17632931839176258 40,0.5185312388663046 41,-0.012936736689084183 42,-0.448160320534273 43,0.6350632300808939 44,0.04949304805763625 45,-0.2432433769662173 46,0.6249858642899504 47,0.6378324850007271 48,0.11748128998251547 49,0.22769161433488594 50,-0.09357371888223588 51,-0.4689189671828656 52,-0.19975632460237797 53,-0.2423900870020736 54,-0.46665854632291665 55,-0.4147627139539826 56,-0.08037838869794499 57,0.1294563939194988 58,0.18008238721134304 59,-0.272417099340699 60,-0.3910002606170435 61,-0.14394383763819776 62,-0.23097788454609886 63,-0.49466561095255035 64,-0.03367094018390784 65,-0.37823017245840224 66,-0.46429573007963243 67,-0.4636254796424125 68,-0.9437093439415877 69,-1.4486710790972712 70,-1.2625949613347036 71,-0.3639555777448285 72,-0.5758778795245879 73,-0.39091630882825934 74,-1.4592141346660912 75,-3.666536400905963 76,-0.49446037809927185 77,-0.474168608815242 78,-2.034507828768904 79,-1.72464760476325 80,-1.8232095324001871 81,-0.40715068454419845 82,-1.5662393657867584 83,-0.37065090473595114 84,-0.39972568539550746 85,-0.4213817531365571 86,-0.2783008189352365 87,-0.3326800473218013 88,-0.39642183234812395 89,-0.32959639600890744 90,-0.3111062543245556 91,-0.2362374031733925 92,-0.36995830865420926 93,-0.20687695205811935 94,-0.31737136619679357 95,-0.38658864079411875 96,-0.24701656790929832 97,-0.41389003319426515 98,-0.4205225383142136 99,-0.2691037141965269
 0,0.20330526070816188 1,0.38761753143515965 2,-0.8067079547964761 3,-0.4760318838707589 4,-0.8685987446962649 5,0.1724557168737688 6,0.22367497575720122 7,-0.5885922525256934 8,-0.0918646964906696 9,-0.7019200206353852 10,-0.5728169753044996 11,-0.35533633962233035 12,-0.6404234792625831 13,0.36716996070203867 14,-0.9103454316694869 15,-0.9174459402277044 16,0.6356413922293473 17,-0.8626736909919648 18,1.6944254894528852 19,1.5786871588720628 20,1.4357425999919904 21,1.2147480165947735 22,-0.3736867861588647 23,-0.020652171679399662 24,-0.5190591649906469 25,2.7000324786738528 26,2.2118013298682446 27,-1.0148225250637415 28,-0.33909766033086297 29,0.8601813561894409 30,0.29630884577005623 31,1.0561809289831288 32,-0.25244327325525545 33,-0.507171353548104 34,0.5883324929865971 35,0.26738309791216613 36,-0.6272539173734938 37,0.043802694447453876 38,0.46639844649763684 39,-0.34149112032696916 40,-0.0779400364137079 41,-0.47123294328110304