From 011144961db54bc378b0c5daabd4f83a7177bb5c Mon Sep 17 00:00:00 2001
From: vlaville <vincent.laville@pasteur.fr>
Date: Tue, 9 Jul 2019 17:57:55 +0200
Subject: [PATCH] README

---
 .gitignore                    |   2 +
 EXAMPLE/example_input.txt     |  11 ++++++
 EXAMPLE/example_output.txt.gz | Bin 0 -> 907 bytes
 HELP.md                       |  68 ----------------------------------
 README.md                     |  32 +++++++++++++++-
 example_input.txt             |  11 ++++++
 example_output.txt.gz         | Bin 0 -> 907 bytes
 j2s.py                        |   6 +--
 8 files changed, 57 insertions(+), 73 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 EXAMPLE/example_input.txt
 create mode 100644 EXAMPLE/example_output.txt.gz
 delete mode 100644 HELP.md
 create mode 100644 example_input.txt
 create mode 100644 example_output.txt.gz

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..13486c5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+j2s_old_output.py
+
diff --git a/EXAMPLE/example_input.txt b/EXAMPLE/example_input.txt
new file mode 100644
index 0000000..c9c03b6
--- /dev/null
+++ b/EXAMPLE/example_input.txt
@@ -0,0 +1,11 @@
+MarkerName	Effect	StdErr	IntEffect	IntStdErr	IntCov	N
+rs1	0.06464	0.09852999999999999	0.03685	0.1539	-0.009603	302478
+rs2	0.1482	0.0439	0.1071	0.05265	-0.0019219999999999999	551772
+rs3	0.2428	0.06389	0.1027	0.07647999999999999	-0.004072999999999999	537523
+rs4	-0.1821	0.05173	-0.06716	0.06105	-0.002671	540537
+rs5	-0.1665	0.04127	-0.055060000000000005	0.0498	-0.0016920000000000001	527550
+rs6	0.09873	0.0404	-0.0314	0.04846	-0.001627	532076
+rs7	0.2008	0.042910000000000004	-0.1251	0.05058	-0.001835	541066
+rs8	-0.044430000000000004	0.07288	0.07737999999999999	0.0825	-0.005298	391711
+rs9	-0.23199999999999998	0.1064	-0.1964	0.1246	-0.01128	508288
+rs10	0.2414	0.06343	-0.1089	0.07171	-0.004017	401959
diff --git a/EXAMPLE/example_output.txt.gz b/EXAMPLE/example_output.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..37a3662bd61863993934943b4714b1ba1f748085
GIT binary patch
literal 907
zcmV;619bc!iwFoRpd?)a|7Cb#ZE$R5UvG7EaCLMpba-?CR8&iDR6z_}ckYsr|DOY}
zBs<n95)&2(6on1Pr`-L5LPwf6d1JTB<tl%Ee*1BMdwTwPU!VTGe!aiEyg$GG{<goq
z?$4+7<=6cR`1fr>&Ofi$^J#s4e*3<o-oH%H{{DI%PY-XuWzkLCr9Yg+JCYVN^)e+T
zG07!2w-#v*%PjJ&`y<y9_13x(lGtkQBh#J-QR0GV!VRiq(o5M(^#r$e2W$B*SIjCF
zbA3_v@*5XZN2t5wrJ|TDcg_^xRwSwU&NUIIWFl7~Cj(N{tTG8F5-JuX#{%{;-^I~T
zO=@UeP}$6tBf*4Pxh|!Q%oAOaY|+fI&#6k_Mhb-pjBqK+icn-{wxv|jg;~=@f!WsM
zRBnmMSMfy624?W4w)oar5hNf$MY{$yno%%gz=32g;mHLHdSIKfaDy>TiwqE=)JAgR
zh0#0+%hrAD6wbg%fE=mZ;;1Z`kDv?2s7o1zX}N*W1a99~345GL3AhgC^l1FqvRm*-
zA{xDUaw6TqwtMk~`<*j}j{If`gqR*0=cKql(!l19y25)JOWe8XK6-=_+KU=fO3c<)
z-_p@G+AC0wnW3yMmvkqf-e4nDPj<UOB`l*ysr`~M3$D;G0P6(R$mQ50r>;XWe%rB7
zheAvUF~i_TACO<4VCZEOe&FKll3o=hO-=42sWY&K{xgAO3RkSrYzFY|F3}NvINME<
z#4{(r1toO!3c*8mjDah8n~qF0=*ShCJicg#C=1+__n5(^KIo27Ns?c#O+Y7D^S`1{
zl|a@m9v&DQcxem?L*em(fo_y>gBAvU9ce)VAI^S<VGj9lsVZ(7<Kk*)efo$0Ae;r`
zS;v_^85Ykb10KQ?xlH&dECQHHFebdY96@6af&1tvT#wGs3Zoww0>8=NfJ;G(TO}^x
zl%>t+1ju{t!S#F2;TC1+xRDMoPus8?T#6B<wv4#JDhy~B1kvdUG}yH_#b{&+SHlr7
z0wiF1VnH&@+9S(-E|Egfpoy_mq0X31>#%5}ECh}<se5Kl1xA3F_t>yNHov1^FC-*X
zj*%4nBLP+TGz^7of1%mRx|ScGUe)w{`9VAy;@G(?G@x}B+qI}?JprO$<7}A7u#tyQ
h@Pq#O+MU%g)p8l!?EUZ;009600|1O?N&3_V001ITxQYM(

literal 0
HcmV?d00001

diff --git a/HELP.md b/HELP.md
deleted file mode 100644
index 4474ad8..0000000
--- a/HELP.md
+++ /dev/null
@@ -1,68 +0,0 @@
-j2s: Deriving stratified effects from joint models investigating Gene-Environment Interactions
-======
-
-The python3 script **j2s.py** allows for the estimation of genetic effect sizes in unexposed and exposed individuals separately from joint models investigating Gene-Environment interactions when the exposure is binary. A pre-print of the publication is available [here](https://www.biorxiv.org/content/10.1101/693218v1).
-
-Prerequisite
-------------
-
-To execute the script, python3 must be installed as weel as the following Python packages:
-  + pandas (version 0.22.0)
-  + numpy (version 1.13.3)
-  + scipy (version 0.19.1)
-
-Execution
-------------
-
-To use the script, type the following command:
-
-``` bash
-python3 INFILE N N_EXPOSED OUTFILE
-```
-
-with:
-  * INFILE is the path to the input file describing the summary statitics in the joint model,
-  * N is the total sample size,
-  * N_EXPO is the number of exposed (E = 1) individuals,
-  * OUTFILE is the path to the outputfile.
-
-Description of the input files
-------------
-
-The input file has 7 mandatory columns:
-  * the identifier of the variant (e.g rs number) labelled 'MarkerName'
-  * the main genetic effect size labelled 'Effect'
-  * the standard error of the main genetic effect size labelled 'StdErr'
-  * the interaction effect size labelled 'IntEffect'
-  * the standard error of the interaction effect size labelled 'IntStdErr'
-  * the covariance between the main genetic effect size and the interaction effect size labelled 'IntCov'
-  * the sample size of the variant labelled 'N'
-
-| MarkerName   | Effect   |StdErr       |IntEffect    |IntStdErr     |IntCov        |N       |
-| ------------ | -------- | ----------- | ----------- | ------------ | -------------| ------ |
-| rs1          | 0.06464  | 0.09853     | 0.03685     |  0.1539      | -0.009603    | 302478 |
-| rs2          | 0.1482   | 0.0439      | 0.1071      |  0.05265     | -0.001922    | 551772 |
-| rs3          | 0.2428   | 0.06389     | 0.1027      |  0.07648     | -0.004073    | 537523 |
-| rs4          | -0.1821  | 0.05173     | -0.06716    |  0.06105     | -0.002671    | 540537 |
-| rs4          | -0.1665  | 0.04127     | -0.05506    |  0.0498      | -0.001692    | 527550 |
-
-This format corresponds to the output of the METAL software performing the joint test ([Manning et al, 2011](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3312394/))
-
-Short tutorial
---------------
-
-
-Bug report / Help
------------------
-
-Please open an issue if you find a bug.
-
-Code of conduct
----------------
-
-Please note that this project is released with a [Contributor Code of Conduct](https://gitlab.pasteur.fr/statistical-genetics/j2s/blob/master/code-of-conduct.md). By participating in this project you agree to abide by its terms.
-
-License
--------
-
-This project is licensed under the MIT License - see the [LICENSE.md](https://gitlab.pasteur.fr/statistical-genetics/j2s/blob/master/LICENSE) file for details
diff --git a/README.md b/README.md
index 4474ad8..32cdd06 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ with:
   * N_EXPO is the number of exposed (E = 1) individuals,
   * OUTFILE is the path to the outputfile.
 
-Description of the input files
+Description of the input file
 ------------
 
 The input file has 7 mandatory columns:
@@ -48,9 +48,37 @@ The input file has 7 mandatory columns:
 
 This format corresponds to the output of the METAL software performing the joint test ([Manning et al, 2011](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3312394/))
 
-Short tutorial
+Description of the output file
 --------------
 
+The output file has 13 columns:
+  * the identifier of the variant (e.g rs number) labelled 'MarkerName'
+  * the marginal genetic effect size in exposed individuals labelled 'Exp_eff'
+  * the standard error of the marginal genetic effect size in exposed individuals labelled 'Exp_eff_sd'
+  * the p-value of the marginal genetic effect size in exposed individuals labelled 'Exp_p'
+  * the sample size in exposed individuals labelled 'Exp_N'
+  * the marginal genetic effect size in unexposed individuals labelled 'Unexp_eff'
+  * the standard error of the marginal genetic effect size in unexposed individuals labelled 'Unexp_eff_sd'
+  * the p-value of the marginal genetic effect size in unexposed individuals labelled 'Unexp_p'
+  * the psample size  in exposed individuals labelled 'Exp_p'
+  * the marginal genetic effect size in the whole sample labelled 'Marg_eff'
+  * the standard error of the marginal genetic effect size in the whole sample labelled 'Marg_eff_sd'
+  * the p-value of the marginal genetic effect size in the whole sample labelled 'Marg_p'
+  * the sample size  in exposed individuals labelled 'Exp_p'
+
+
+  MarkerName | Exp_eff | Exp_eff_sd | Exp_p | Exp_N | Unexp_eff | Unexp_eff_sd | Unexp_p | Unexp_N | Marg_eff | Marg_eff_sd | Marg_p | Marg_N
+  --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | ---
+  rs2 | 0.25529999999999997 | 0.029244358430302427 | 2.5492291647780578e-18 | 235938.0 | 0.1482 | 0.0439 | 0.0007358690030330429 | 315833.0 | 0.1939961385 | 0.02811324629168079 | 5.181096635559163e-12 | 551772
+  rs3 | 0.3455 | 0.04225071005320504 | 2.9008842650020812e-16 | 229845.0 | 0.2428 | 0.06389 | 0.0001445316729934853 | 307677.0 | 0.28671469116666665 | 0.040843280080164183 | 2.2206797794728803e-12 | 537523
+  rs4 | -0.24926 | 0.03257445932014834 | 1.9786602680323982e-14 | 231134.0 | -0.1821 | 0.05173 | 0.00043121980737467655 | 309402.0 | -0.21081772793333334 | 0.032760038001211925 | 1.233026411024831e-10 | 540537
+  rs5 | -0.22156 | 0.028271061175696964 | 4.614778694925717e-15 | 225581.0 | -0.1665 | 0.04127 | 5.474023450141706e-05 | 301968.0 | -0.1900437477666667 | 0.026639580953178844 | 9.757377728977485e-13 | 527550
+  rs6 | 0.06733 | 0.026954250128690283 | 0.01249186364041974 | 227516.0 | 0.09873 | 0.0404 | 0.014533002227448433 | 304559.0 | 0.08530330766666666 | 0.025886829090553485 | 0.000983377501863686 | 532076
+  rs7 | 0.07570000000000002 | 0.02701119212474709 | 0.005070161655305943 | 231360.0 | 0.2008 | 0.04291 | 2.8748859265040098e-06 | 309705.0 | 0.14730703150000002 | 0.027198241852565627 | 6.093014771116645e-08 | 541066
+  rs8 | 0.03294999999999999 | 0.03900954242233562 | 0.39829783686389786 | 167496.0 | -0.04443 | 0.07288 | 0.5421054285558802 | 224214.0 | -0.011342183033333335 | 0.04500115076521829 | 0.8010085487588172 | 391711
+  rs9 | -0.4284 | 0.06546846569150679 | 6.005193234873927e-11 | 217344.0 | -0.232 | 0.1064 | 0.029224037771486945 | 290943.0 | -0.31598096733333336 | 0.06717839018056906 | 2.5560091937373993e-06 | 508288
+  rs10 | 0.1325 | 0.03364058560726908 | 8.192605950085655e-05 | 171878.0 | 0.2414 | 0.06343 | 0.00014136362518112045 | 230080.0 | 0.1948341785 | 0.0390928641631008 | 6.232145316728965e-07 | 401959
+
 
 Bug report / Help
 -----------------
diff --git a/example_input.txt b/example_input.txt
new file mode 100644
index 0000000..c9c03b6
--- /dev/null
+++ b/example_input.txt
@@ -0,0 +1,11 @@
+MarkerName	Effect	StdErr	IntEffect	IntStdErr	IntCov	N
+rs1	0.06464	0.09852999999999999	0.03685	0.1539	-0.009603	302478
+rs2	0.1482	0.0439	0.1071	0.05265	-0.0019219999999999999	551772
+rs3	0.2428	0.06389	0.1027	0.07647999999999999	-0.004072999999999999	537523
+rs4	-0.1821	0.05173	-0.06716	0.06105	-0.002671	540537
+rs5	-0.1665	0.04127	-0.055060000000000005	0.0498	-0.0016920000000000001	527550
+rs6	0.09873	0.0404	-0.0314	0.04846	-0.001627	532076
+rs7	0.2008	0.042910000000000004	-0.1251	0.05058	-0.001835	541066
+rs8	-0.044430000000000004	0.07288	0.07737999999999999	0.0825	-0.005298	391711
+rs9	-0.23199999999999998	0.1064	-0.1964	0.1246	-0.01128	508288
+rs10	0.2414	0.06343	-0.1089	0.07171	-0.004017	401959
diff --git a/example_output.txt.gz b/example_output.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..37a3662bd61863993934943b4714b1ba1f748085
GIT binary patch
literal 907
zcmV;619bc!iwFoRpd?)a|7Cb#ZE$R5UvG7EaCLMpba-?CR8&iDR6z_}ckYsr|DOY}
zBs<n95)&2(6on1Pr`-L5LPwf6d1JTB<tl%Ee*1BMdwTwPU!VTGe!aiEyg$GG{<goq
z?$4+7<=6cR`1fr>&Ofi$^J#s4e*3<o-oH%H{{DI%PY-XuWzkLCr9Yg+JCYVN^)e+T
zG07!2w-#v*%PjJ&`y<y9_13x(lGtkQBh#J-QR0GV!VRiq(o5M(^#r$e2W$B*SIjCF
zbA3_v@*5XZN2t5wrJ|TDcg_^xRwSwU&NUIIWFl7~Cj(N{tTG8F5-JuX#{%{;-^I~T
zO=@UeP}$6tBf*4Pxh|!Q%oAOaY|+fI&#6k_Mhb-pjBqK+icn-{wxv|jg;~=@f!WsM
zRBnmMSMfy624?W4w)oar5hNf$MY{$yno%%gz=32g;mHLHdSIKfaDy>TiwqE=)JAgR
zh0#0+%hrAD6wbg%fE=mZ;;1Z`kDv?2s7o1zX}N*W1a99~345GL3AhgC^l1FqvRm*-
zA{xDUaw6TqwtMk~`<*j}j{If`gqR*0=cKql(!l19y25)JOWe8XK6-=_+KU=fO3c<)
z-_p@G+AC0wnW3yMmvkqf-e4nDPj<UOB`l*ysr`~M3$D;G0P6(R$mQ50r>;XWe%rB7
zheAvUF~i_TACO<4VCZEOe&FKll3o=hO-=42sWY&K{xgAO3RkSrYzFY|F3}NvINME<
z#4{(r1toO!3c*8mjDah8n~qF0=*ShCJicg#C=1+__n5(^KIo27Ns?c#O+Y7D^S`1{
zl|a@m9v&DQcxem?L*em(fo_y>gBAvU9ce)VAI^S<VGj9lsVZ(7<Kk*)efo$0Ae;r`
zS;v_^85Ykb10KQ?xlH&dECQHHFebdY96@6af&1tvT#wGs3Zoww0>8=NfJ;G(TO}^x
zl%>t+1ju{t!S#F2;TC1+xRDMoPus8?T#6B<wv4#JDhy~B1kvdUG}yH_#b{&+SHlr7
z0wiF1VnH&@+9S(-E|Egfpoy_mq0X31>#%5}ECh}<se5Kl1xA3F_t>yNHov1^FC-*X
zj*%4nBLP+TGz^7of1%mRx|ScGUe)w{`9VAy;@G(?G@x}B+qI}?JprO$<7}A7u#tyQ
h@Pq#O+MU%g)p8l!?EUZ;009600|1O?N&3_V001ITxQYM(

literal 0
HcmV?d00001

diff --git a/j2s.py b/j2s.py
index 02065c3..617309b 100644
--- a/j2s.py
+++ b/j2s.py
@@ -12,7 +12,7 @@ stt = time.time()
 
 # Checking number of arguments (4 expected)
 
-if len(sys.argv) != 4 :
+if len(sys.argv) != 5 :
     print("Incorrect call: wrong number of arguments\n")
     sys.exit(1)
 
@@ -24,7 +24,7 @@ outfile = sys.argv[4]
 print("Arguments passed:")
 print("\tSample size = " + str(samplesize))
 print("\tN exposed = " + str(Nexpo))
-print("Output file = " + outfile + "\n")
+print("\tOutput file = " + outfile + "\n")
 print("\n")
 
 meanE = Nexpo / samplesize
@@ -56,6 +56,6 @@ df['Marg_p'] = chi2.sf((df['Marg_eff'] / df['Marg_eff_sd']) ** 2, 1)
 df['Marg_N'] = df['N']
 
 # Writing output file
-df.loc[:,['rsID', 'Chr', 'BP', 'MarkerName', 'Allele1', 'Allele2', 'Freq1', 'Exp_eff', 'Exp_eff_sd', 'Exp_p', 'Exp_N', 'Unexp_eff', 'Unexp_eff_sd', 'Unexp_p', 'Unexp_N', 'Marg_eff', 'Marg_eff_sd', 'Marg_p' , 'Marg_N']].to_csv(outfile, sep = '\t', index = False, header = True, compression = 'gzip')
+df.loc[:,['MarkerName', 'Exp_eff', 'Exp_eff_sd', 'Exp_p', 'Exp_N', 'Unexp_eff', 'Unexp_eff_sd', 'Unexp_p', 'Unexp_N', 'Marg_eff', 'Marg_eff_sd', 'Marg_p' , 'Marg_N']].to_csv(outfile, sep = '\t', index = False, header = True, compression = 'gzip')
 
 print("Analysis finished ")
-- 
GitLab