From 011144961db54bc378b0c5daabd4f83a7177bb5c Mon Sep 17 00:00:00 2001 From: vlaville <vincent.laville@pasteur.fr> Date: Tue, 9 Jul 2019 17:57:55 +0200 Subject: [PATCH] README --- .gitignore | 2 + EXAMPLE/example_input.txt | 11 ++++++ EXAMPLE/example_output.txt.gz | Bin 0 -> 907 bytes HELP.md | 68 ---------------------------------- README.md | 32 +++++++++++++++- example_input.txt | 11 ++++++ example_output.txt.gz | Bin 0 -> 907 bytes j2s.py | 6 +-- 8 files changed, 57 insertions(+), 73 deletions(-) create mode 100644 .gitignore create mode 100644 EXAMPLE/example_input.txt create mode 100644 EXAMPLE/example_output.txt.gz delete mode 100644 HELP.md create mode 100644 example_input.txt create mode 100644 example_output.txt.gz diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..13486c5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +j2s_old_output.py + diff --git a/EXAMPLE/example_input.txt b/EXAMPLE/example_input.txt new file mode 100644 index 0000000..c9c03b6 --- /dev/null +++ b/EXAMPLE/example_input.txt @@ -0,0 +1,11 @@ +MarkerName Effect StdErr IntEffect IntStdErr IntCov N +rs1 0.06464 0.09852999999999999 0.03685 0.1539 -0.009603 302478 +rs2 0.1482 0.0439 0.1071 0.05265 -0.0019219999999999999 551772 +rs3 0.2428 0.06389 0.1027 0.07647999999999999 -0.004072999999999999 537523 +rs4 -0.1821 0.05173 -0.06716 0.06105 -0.002671 540537 +rs5 -0.1665 0.04127 -0.055060000000000005 0.0498 -0.0016920000000000001 527550 +rs6 0.09873 0.0404 -0.0314 0.04846 -0.001627 532076 +rs7 0.2008 0.042910000000000004 -0.1251 0.05058 -0.001835 541066 +rs8 -0.044430000000000004 0.07288 0.07737999999999999 0.0825 -0.005298 391711 +rs9 -0.23199999999999998 0.1064 -0.1964 0.1246 -0.01128 508288 +rs10 0.2414 0.06343 -0.1089 0.07171 -0.004017 401959 diff --git a/EXAMPLE/example_output.txt.gz b/EXAMPLE/example_output.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..37a3662bd61863993934943b4714b1ba1f748085 GIT binary patch literal 907 zcmV;619bc!iwFoRpd?)a|7Cb#ZE$R5UvG7EaCLMpba-?CR8&iDR6z_}ckYsr|DOY} zBs<n95)&2(6on1Pr`-L5LPwf6d1JTB<tl%Ee*1BMdwTwPU!VTGe!aiEyg$GG{<goq z?$4+7<=6cR`1fr>&Ofi$^J#s4e*3<o-oH%H{{DI%PY-XuWzkLCr9Yg+JCYVN^)e+T zG07!2w-#v*%PjJ&`y<y9_13x(lGtkQBh#J-QR0GV!VRiq(o5M(^#r$e2W$B*SIjCF zbA3_v@*5XZN2t5wrJ|TDcg_^xRwSwU&NUIIWFl7~Cj(N{tTG8F5-JuX#{%{;-^I~T zO=@UeP}$6tBf*4Pxh|!Q%oAOaY|+fIk_Mhb-pjBqK+icn-{wxv|jg;~=@f!WsM zRBnmMSMfy624?W4w)oar5hNf$MY{$yno%%gz=32g;mHLHdSIKfaDy>TiwqE=)JAgR zh0#0+%hrAD6wbg%fE=mZ;;1Z`kDv?2s7o1zX}N*W1a99~345GL3AhgC^l1FqvRm*- zA{xDUaw6TqwtMk~`<*j}j{If`gqR*0=cKql(!l19y25)JOWe8XK6-=_+KU=fO3c<) z-_p@G+AC0wnW3yMmvkqf-e4nDPj<UOB`l*ysr`~M3$D;G0P6(R$mQ50r>;XWe%rB7 zheAvUF~i_TACO<4VCZEOe&FKll3o=hO-=42sWY&K{xgAO3RkSrYzFY|F3}NvINME< z#4{(r1toO!3c*8mjDah8n~qF0=*ShCJicg#C=1+__n5(^KIo27Ns?c#O+Y7D^S`1{ zl|a@m9v&DQcxem?L*em(fo_y>gBAvU9ce)VAI^S<VGj9lsVZ(7<Kk*)efo$0Ae;r` zS;v_^85Ykb10KQ?xlH&dECQHHFebdY96@6af&1tvT#wGs3Zoww0>8=NfJ;G(TO}^x zl%>t+1ju{t!S#F2;TC1+xRDMoPus8?T#6B<wv4#JDhy~B1kvdUG}yH_#b{&+SHlr7 z0wiF1VnH&@+9S(-E|Egfpoy_mq0X31>#%5}ECh}<se5Kl1xA3F_t>yNHov1^FC-*X zj*%4nBLP+TGz^7of1%mRx|ScGUe)w{`9VAy;@G(?G@x}B+qI}?JprO$<7}A7u#tyQ h@Pq#O+MU%g)p8l!?EUZ;009600|1O?N&3_V001ITxQYM( literal 0 HcmV?d00001 diff --git a/HELP.md b/HELP.md deleted file mode 100644 index 4474ad8..0000000 --- a/HELP.md +++ /dev/null @@ -1,68 +0,0 @@ -j2s: Deriving stratified effects from joint models investigating Gene-Environment Interactions -====== - -The python3 script **j2s.py** allows for the estimation of genetic effect sizes in unexposed and exposed individuals separately from joint models investigating Gene-Environment interactions when the exposure is binary. A pre-print of the publication is available [here](https://www.biorxiv.org/content/10.1101/693218v1). - -Prerequisite ------------- - -To execute the script, python3 must be installed as weel as the following Python packages: - + pandas (version 0.22.0) - + numpy (version 1.13.3) - + scipy (version 0.19.1) - -Execution ------------- - -To use the script, type the following command: - -``` bash -python3 INFILE N N_EXPOSED OUTFILE -``` - -with: - * INFILE is the path to the input file describing the summary statitics in the joint model, - * N is the total sample size, - * N_EXPO is the number of exposed (E = 1) individuals, - * OUTFILE is the path to the outputfile. - -Description of the input files ------------- - -The input file has 7 mandatory columns: - * the identifier of the variant (e.g rs number) labelled 'MarkerName' - * the main genetic effect size labelled 'Effect' - * the standard error of the main genetic effect size labelled 'StdErr' - * the interaction effect size labelled 'IntEffect' - * the standard error of the interaction effect size labelled 'IntStdErr' - * the covariance between the main genetic effect size and the interaction effect size labelled 'IntCov' - * the sample size of the variant labelled 'N' - -| MarkerName | Effect |StdErr |IntEffect |IntStdErr |IntCov |N | -| ------------ | -------- | ----------- | ----------- | ------------ | -------------| ------ | -| rs1 | 0.06464 | 0.09853 | 0.03685 | 0.1539 | -0.009603 | 302478 | -| rs2 | 0.1482 | 0.0439 | 0.1071 | 0.05265 | -0.001922 | 551772 | -| rs3 | 0.2428 | 0.06389 | 0.1027 | 0.07648 | -0.004073 | 537523 | -| rs4 | -0.1821 | 0.05173 | -0.06716 | 0.06105 | -0.002671 | 540537 | -| rs4 | -0.1665 | 0.04127 | -0.05506 | 0.0498 | -0.001692 | 527550 | - -This format corresponds to the output of the METAL software performing the joint test ([Manning et al, 2011](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3312394/)) - -Short tutorial --------------- - - -Bug report / Help ------------------ - -Please open an issue if you find a bug. - -Code of conduct ---------------- - -Please note that this project is released with a [Contributor Code of Conduct](https://gitlab.pasteur.fr/statistical-genetics/j2s/blob/master/code-of-conduct.md). By participating in this project you agree to abide by its terms. - -License -------- - -This project is licensed under the MIT License - see the [LICENSE.md](https://gitlab.pasteur.fr/statistical-genetics/j2s/blob/master/LICENSE) file for details diff --git a/README.md b/README.md index 4474ad8..32cdd06 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ with: * N_EXPO is the number of exposed (E = 1) individuals, * OUTFILE is the path to the outputfile. -Description of the input files +Description of the input file ------------ The input file has 7 mandatory columns: @@ -48,9 +48,37 @@ The input file has 7 mandatory columns: This format corresponds to the output of the METAL software performing the joint test ([Manning et al, 2011](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3312394/)) -Short tutorial +Description of the output file -------------- +The output file has 13 columns: + * the identifier of the variant (e.g rs number) labelled 'MarkerName' + * the marginal genetic effect size in exposed individuals labelled 'Exp_eff' + * the standard error of the marginal genetic effect size in exposed individuals labelled 'Exp_eff_sd' + * the p-value of the marginal genetic effect size in exposed individuals labelled 'Exp_p' + * the sample size in exposed individuals labelled 'Exp_N' + * the marginal genetic effect size in unexposed individuals labelled 'Unexp_eff' + * the standard error of the marginal genetic effect size in unexposed individuals labelled 'Unexp_eff_sd' + * the p-value of the marginal genetic effect size in unexposed individuals labelled 'Unexp_p' + * the psample size in exposed individuals labelled 'Exp_p' + * the marginal genetic effect size in the whole sample labelled 'Marg_eff' + * the standard error of the marginal genetic effect size in the whole sample labelled 'Marg_eff_sd' + * the p-value of the marginal genetic effect size in the whole sample labelled 'Marg_p' + * the sample size in exposed individuals labelled 'Exp_p' + + + MarkerName | Exp_eff | Exp_eff_sd | Exp_p | Exp_N | Unexp_eff | Unexp_eff_sd | Unexp_p | Unexp_N | Marg_eff | Marg_eff_sd | Marg_p | Marg_N + --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- + rs2 | 0.25529999999999997 | 0.029244358430302427 | 2.5492291647780578e-18 | 235938.0 | 0.1482 | 0.0439 | 0.0007358690030330429 | 315833.0 | 0.1939961385 | 0.02811324629168079 | 5.181096635559163e-12 | 551772 + rs3 | 0.3455 | 0.04225071005320504 | 2.9008842650020812e-16 | 229845.0 | 0.2428 | 0.06389 | 0.0001445316729934853 | 307677.0 | 0.28671469116666665 | 0.040843280080164183 | 2.2206797794728803e-12 | 537523 + rs4 | -0.24926 | 0.03257445932014834 | 1.9786602680323982e-14 | 231134.0 | -0.1821 | 0.05173 | 0.00043121980737467655 | 309402.0 | -0.21081772793333334 | 0.032760038001211925 | 1.233026411024831e-10 | 540537 + rs5 | -0.22156 | 0.028271061175696964 | 4.614778694925717e-15 | 225581.0 | -0.1665 | 0.04127 | 5.474023450141706e-05 | 301968.0 | -0.1900437477666667 | 0.026639580953178844 | 9.757377728977485e-13 | 527550 + rs6 | 0.06733 | 0.026954250128690283 | 0.01249186364041974 | 227516.0 | 0.09873 | 0.0404 | 0.014533002227448433 | 304559.0 | 0.08530330766666666 | 0.025886829090553485 | 0.000983377501863686 | 532076 + rs7 | 0.07570000000000002 | 0.02701119212474709 | 0.005070161655305943 | 231360.0 | 0.2008 | 0.04291 | 2.8748859265040098e-06 | 309705.0 | 0.14730703150000002 | 0.027198241852565627 | 6.093014771116645e-08 | 541066 + rs8 | 0.03294999999999999 | 0.03900954242233562 | 0.39829783686389786 | 167496.0 | -0.04443 | 0.07288 | 0.5421054285558802 | 224214.0 | -0.011342183033333335 | 0.04500115076521829 | 0.8010085487588172 | 391711 + rs9 | -0.4284 | 0.06546846569150679 | 6.005193234873927e-11 | 217344.0 | -0.232 | 0.1064 | 0.029224037771486945 | 290943.0 | -0.31598096733333336 | 0.06717839018056906 | 2.5560091937373993e-06 | 508288 + rs10 | 0.1325 | 0.03364058560726908 | 8.192605950085655e-05 | 171878.0 | 0.2414 | 0.06343 | 0.00014136362518112045 | 230080.0 | 0.1948341785 | 0.0390928641631008 | 6.232145316728965e-07 | 401959 + Bug report / Help ----------------- diff --git a/example_input.txt b/example_input.txt new file mode 100644 index 0000000..c9c03b6 --- /dev/null +++ b/example_input.txt @@ -0,0 +1,11 @@ +MarkerName Effect StdErr IntEffect IntStdErr IntCov N +rs1 0.06464 0.09852999999999999 0.03685 0.1539 -0.009603 302478 +rs2 0.1482 0.0439 0.1071 0.05265 -0.0019219999999999999 551772 +rs3 0.2428 0.06389 0.1027 0.07647999999999999 -0.004072999999999999 537523 +rs4 -0.1821 0.05173 -0.06716 0.06105 -0.002671 540537 +rs5 -0.1665 0.04127 -0.055060000000000005 0.0498 -0.0016920000000000001 527550 +rs6 0.09873 0.0404 -0.0314 0.04846 -0.001627 532076 +rs7 0.2008 0.042910000000000004 -0.1251 0.05058 -0.001835 541066 +rs8 -0.044430000000000004 0.07288 0.07737999999999999 0.0825 -0.005298 391711 +rs9 -0.23199999999999998 0.1064 -0.1964 0.1246 -0.01128 508288 +rs10 0.2414 0.06343 -0.1089 0.07171 -0.004017 401959 diff --git a/example_output.txt.gz b/example_output.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..37a3662bd61863993934943b4714b1ba1f748085 GIT binary patch literal 907 zcmV;619bc!iwFoRpd?)a|7Cb#ZE$R5UvG7EaCLMpba-?CR8&iDR6z_}ckYsr|DOY} zBs<n95)&2(6on1Pr`-L5LPwf6d1JTB<tl%Ee*1BMdwTwPU!VTGe!aiEyg$GG{<goq z?$4+7<=6cR`1fr>&Ofi$^J#s4e*3<o-oH%H{{DI%PY-XuWzkLCr9Yg+JCYVN^)e+T zG07!2w-#v*%PjJ&`y<y9_13x(lGtkQBh#J-QR0GV!VRiq(o5M(^#r$e2W$B*SIjCF zbA3_v@*5XZN2t5wrJ|TDcg_^xRwSwU&NUIIWFl7~Cj(N{tTG8F5-JuX#{%{;-^I~T zO=@UeP}$6tBf*4Pxh|!Q%oAOaY|+fIk_Mhb-pjBqK+icn-{wxv|jg;~=@f!WsM zRBnmMSMfy624?W4w)oar5hNf$MY{$yno%%gz=32g;mHLHdSIKfaDy>TiwqE=)JAgR zh0#0+%hrAD6wbg%fE=mZ;;1Z`kDv?2s7o1zX}N*W1a99~345GL3AhgC^l1FqvRm*- zA{xDUaw6TqwtMk~`<*j}j{If`gqR*0=cKql(!l19y25)JOWe8XK6-=_+KU=fO3c<) z-_p@G+AC0wnW3yMmvkqf-e4nDPj<UOB`l*ysr`~M3$D;G0P6(R$mQ50r>;XWe%rB7 zheAvUF~i_TACO<4VCZEOe&FKll3o=hO-=42sWY&K{xgAO3RkSrYzFY|F3}NvINME< z#4{(r1toO!3c*8mjDah8n~qF0=*ShCJicg#C=1+__n5(^KIo27Ns?c#O+Y7D^S`1{ zl|a@m9v&DQcxem?L*em(fo_y>gBAvU9ce)VAI^S<VGj9lsVZ(7<Kk*)efo$0Ae;r` zS;v_^85Ykb10KQ?xlH&dECQHHFebdY96@6af&1tvT#wGs3Zoww0>8=NfJ;G(TO}^x zl%>t+1ju{t!S#F2;TC1+xRDMoPus8?T#6B<wv4#JDhy~B1kvdUG}yH_#b{&+SHlr7 z0wiF1VnH&@+9S(-E|Egfpoy_mq0X31>#%5}ECh}<se5Kl1xA3F_t>yNHov1^FC-*X zj*%4nBLP+TGz^7of1%mRx|ScGUe)w{`9VAy;@G(?G@x}B+qI}?JprO$<7}A7u#tyQ h@Pq#O+MU%g)p8l!?EUZ;009600|1O?N&3_V001ITxQYM( literal 0 HcmV?d00001 diff --git a/j2s.py b/j2s.py index 02065c3..617309b 100644 --- a/j2s.py +++ b/j2s.py @@ -12,7 +12,7 @@ stt = time.time() # Checking number of arguments (4 expected) -if len(sys.argv) != 4 : +if len(sys.argv) != 5 : print("Incorrect call: wrong number of arguments\n") sys.exit(1) @@ -24,7 +24,7 @@ outfile = sys.argv[4] print("Arguments passed:") print("\tSample size = " + str(samplesize)) print("\tN exposed = " + str(Nexpo)) -print("Output file = " + outfile + "\n") +print("\tOutput file = " + outfile + "\n") print("\n") meanE = Nexpo / samplesize @@ -56,6 +56,6 @@ df['Marg_p'] = chi2.sf((df['Marg_eff'] / df['Marg_eff_sd']) ** 2, 1) df['Marg_N'] = df['N'] # Writing output file -df.loc[:,['rsID', 'Chr', 'BP', 'MarkerName', 'Allele1', 'Allele2', 'Freq1', 'Exp_eff', 'Exp_eff_sd', 'Exp_p', 'Exp_N', 'Unexp_eff', 'Unexp_eff_sd', 'Unexp_p', 'Unexp_N', 'Marg_eff', 'Marg_eff_sd', 'Marg_p' , 'Marg_N']].to_csv(outfile, sep = '\t', index = False, header = True, compression = 'gzip') +df.loc[:,['MarkerName', 'Exp_eff', 'Exp_eff_sd', 'Exp_p', 'Exp_N', 'Unexp_eff', 'Unexp_eff_sd', 'Unexp_p', 'Unexp_N', 'Marg_eff', 'Marg_eff_sd', 'Marg_p' , 'Marg_N']].to_csv(outfile, sep = '\t', index = False, header = True, compression = 'gzip') print("Analysis finished ") -- GitLab