diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..13486c54305a559c0a4764fc262916cd781d08ab --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +j2s_old_output.py + diff --git a/EXAMPLE/example_input.txt b/EXAMPLE/example_input.txt new file mode 100644 index 0000000000000000000000000000000000000000..c9c03b695912110be8375a3fdaf97418c78aee98 --- /dev/null +++ b/EXAMPLE/example_input.txt @@ -0,0 +1,11 @@ +MarkerName Effect StdErr IntEffect IntStdErr IntCov N +rs1 0.06464 0.09852999999999999 0.03685 0.1539 -0.009603 302478 +rs2 0.1482 0.0439 0.1071 0.05265 -0.0019219999999999999 551772 +rs3 0.2428 0.06389 0.1027 0.07647999999999999 -0.004072999999999999 537523 +rs4 -0.1821 0.05173 -0.06716 0.06105 -0.002671 540537 +rs5 -0.1665 0.04127 -0.055060000000000005 0.0498 -0.0016920000000000001 527550 +rs6 0.09873 0.0404 -0.0314 0.04846 -0.001627 532076 +rs7 0.2008 0.042910000000000004 -0.1251 0.05058 -0.001835 541066 +rs8 -0.044430000000000004 0.07288 0.07737999999999999 0.0825 -0.005298 391711 +rs9 -0.23199999999999998 0.1064 -0.1964 0.1246 -0.01128 508288 +rs10 0.2414 0.06343 -0.1089 0.07171 -0.004017 401959 diff --git a/EXAMPLE/example_output.txt.gz b/EXAMPLE/example_output.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..37a3662bd61863993934943b4714b1ba1f748085 Binary files /dev/null and b/EXAMPLE/example_output.txt.gz differ diff --git a/HELP.md b/HELP.md deleted file mode 100644 index 4474ad82b10b76871c7eb87224fb7b5c0183088c..0000000000000000000000000000000000000000 --- a/HELP.md +++ /dev/null @@ -1,68 +0,0 @@ -j2s: Deriving stratified effects from joint models investigating Gene-Environment Interactions -====== - -The python3 script **j2s.py** allows for the estimation of genetic effect sizes in unexposed and exposed individuals separately from joint models investigating Gene-Environment interactions when the exposure is binary. A pre-print of the publication is available [here](https://www.biorxiv.org/content/10.1101/693218v1). - -Prerequisite ------------- - -To execute the script, python3 must be installed as weel as the following Python packages: - + pandas (version 0.22.0) - + numpy (version 1.13.3) - + scipy (version 0.19.1) - -Execution ------------- - -To use the script, type the following command: - -``` bash -python3 INFILE N N_EXPOSED OUTFILE -``` - -with: - * INFILE is the path to the input file describing the summary statitics in the joint model, - * N is the total sample size, - * N_EXPO is the number of exposed (E = 1) individuals, - * OUTFILE is the path to the outputfile. - -Description of the input files ------------- - -The input file has 7 mandatory columns: - * the identifier of the variant (e.g rs number) labelled 'MarkerName' - * the main genetic effect size labelled 'Effect' - * the standard error of the main genetic effect size labelled 'StdErr' - * the interaction effect size labelled 'IntEffect' - * the standard error of the interaction effect size labelled 'IntStdErr' - * the covariance between the main genetic effect size and the interaction effect size labelled 'IntCov' - * the sample size of the variant labelled 'N' - -| MarkerName | Effect |StdErr |IntEffect |IntStdErr |IntCov |N | -| ------------ | -------- | ----------- | ----------- | ------------ | -------------| ------ | -| rs1 | 0.06464 | 0.09853 | 0.03685 | 0.1539 | -0.009603 | 302478 | -| rs2 | 0.1482 | 0.0439 | 0.1071 | 0.05265 | -0.001922 | 551772 | -| rs3 | 0.2428 | 0.06389 | 0.1027 | 0.07648 | -0.004073 | 537523 | -| rs4 | -0.1821 | 0.05173 | -0.06716 | 0.06105 | -0.002671 | 540537 | -| rs4 | -0.1665 | 0.04127 | -0.05506 | 0.0498 | -0.001692 | 527550 | - -This format corresponds to the output of the METAL software performing the joint test ([Manning et al, 2011](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3312394/)) - -Short tutorial --------------- - - -Bug report / Help ------------------ - -Please open an issue if you find a bug. - -Code of conduct ---------------- - -Please note that this project is released with a [Contributor Code of Conduct](https://gitlab.pasteur.fr/statistical-genetics/j2s/blob/master/code-of-conduct.md). By participating in this project you agree to abide by its terms. - -License -------- - -This project is licensed under the MIT License - see the [LICENSE.md](https://gitlab.pasteur.fr/statistical-genetics/j2s/blob/master/LICENSE) file for details diff --git a/README.md b/README.md index 4474ad82b10b76871c7eb87224fb7b5c0183088c..32cdd0679000dbfd323e1128115e69e1c7ba6d51 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ with: * N_EXPO is the number of exposed (E = 1) individuals, * OUTFILE is the path to the outputfile. -Description of the input files +Description of the input file ------------ The input file has 7 mandatory columns: @@ -48,9 +48,37 @@ The input file has 7 mandatory columns: This format corresponds to the output of the METAL software performing the joint test ([Manning et al, 2011](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3312394/)) -Short tutorial +Description of the output file -------------- +The output file has 13 columns: + * the identifier of the variant (e.g rs number) labelled 'MarkerName' + * the marginal genetic effect size in exposed individuals labelled 'Exp_eff' + * the standard error of the marginal genetic effect size in exposed individuals labelled 'Exp_eff_sd' + * the p-value of the marginal genetic effect size in exposed individuals labelled 'Exp_p' + * the sample size in exposed individuals labelled 'Exp_N' + * the marginal genetic effect size in unexposed individuals labelled 'Unexp_eff' + * the standard error of the marginal genetic effect size in unexposed individuals labelled 'Unexp_eff_sd' + * the p-value of the marginal genetic effect size in unexposed individuals labelled 'Unexp_p' + * the psample size in exposed individuals labelled 'Exp_p' + * the marginal genetic effect size in the whole sample labelled 'Marg_eff' + * the standard error of the marginal genetic effect size in the whole sample labelled 'Marg_eff_sd' + * the p-value of the marginal genetic effect size in the whole sample labelled 'Marg_p' + * the sample size in exposed individuals labelled 'Exp_p' + + + MarkerName | Exp_eff | Exp_eff_sd | Exp_p | Exp_N | Unexp_eff | Unexp_eff_sd | Unexp_p | Unexp_N | Marg_eff | Marg_eff_sd | Marg_p | Marg_N + --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- + rs2 | 0.25529999999999997 | 0.029244358430302427 | 2.5492291647780578e-18 | 235938.0 | 0.1482 | 0.0439 | 0.0007358690030330429 | 315833.0 | 0.1939961385 | 0.02811324629168079 | 5.181096635559163e-12 | 551772 + rs3 | 0.3455 | 0.04225071005320504 | 2.9008842650020812e-16 | 229845.0 | 0.2428 | 0.06389 | 0.0001445316729934853 | 307677.0 | 0.28671469116666665 | 0.040843280080164183 | 2.2206797794728803e-12 | 537523 + rs4 | -0.24926 | 0.03257445932014834 | 1.9786602680323982e-14 | 231134.0 | -0.1821 | 0.05173 | 0.00043121980737467655 | 309402.0 | -0.21081772793333334 | 0.032760038001211925 | 1.233026411024831e-10 | 540537 + rs5 | -0.22156 | 0.028271061175696964 | 4.614778694925717e-15 | 225581.0 | -0.1665 | 0.04127 | 5.474023450141706e-05 | 301968.0 | -0.1900437477666667 | 0.026639580953178844 | 9.757377728977485e-13 | 527550 + rs6 | 0.06733 | 0.026954250128690283 | 0.01249186364041974 | 227516.0 | 0.09873 | 0.0404 | 0.014533002227448433 | 304559.0 | 0.08530330766666666 | 0.025886829090553485 | 0.000983377501863686 | 532076 + rs7 | 0.07570000000000002 | 0.02701119212474709 | 0.005070161655305943 | 231360.0 | 0.2008 | 0.04291 | 2.8748859265040098e-06 | 309705.0 | 0.14730703150000002 | 0.027198241852565627 | 6.093014771116645e-08 | 541066 + rs8 | 0.03294999999999999 | 0.03900954242233562 | 0.39829783686389786 | 167496.0 | -0.04443 | 0.07288 | 0.5421054285558802 | 224214.0 | -0.011342183033333335 | 0.04500115076521829 | 0.8010085487588172 | 391711 + rs9 | -0.4284 | 0.06546846569150679 | 6.005193234873927e-11 | 217344.0 | -0.232 | 0.1064 | 0.029224037771486945 | 290943.0 | -0.31598096733333336 | 0.06717839018056906 | 2.5560091937373993e-06 | 508288 + rs10 | 0.1325 | 0.03364058560726908 | 8.192605950085655e-05 | 171878.0 | 0.2414 | 0.06343 | 0.00014136362518112045 | 230080.0 | 0.1948341785 | 0.0390928641631008 | 6.232145316728965e-07 | 401959 + Bug report / Help ----------------- diff --git a/example_input.txt b/example_input.txt new file mode 100644 index 0000000000000000000000000000000000000000..c9c03b695912110be8375a3fdaf97418c78aee98 --- /dev/null +++ b/example_input.txt @@ -0,0 +1,11 @@ +MarkerName Effect StdErr IntEffect IntStdErr IntCov N +rs1 0.06464 0.09852999999999999 0.03685 0.1539 -0.009603 302478 +rs2 0.1482 0.0439 0.1071 0.05265 -0.0019219999999999999 551772 +rs3 0.2428 0.06389 0.1027 0.07647999999999999 -0.004072999999999999 537523 +rs4 -0.1821 0.05173 -0.06716 0.06105 -0.002671 540537 +rs5 -0.1665 0.04127 -0.055060000000000005 0.0498 -0.0016920000000000001 527550 +rs6 0.09873 0.0404 -0.0314 0.04846 -0.001627 532076 +rs7 0.2008 0.042910000000000004 -0.1251 0.05058 -0.001835 541066 +rs8 -0.044430000000000004 0.07288 0.07737999999999999 0.0825 -0.005298 391711 +rs9 -0.23199999999999998 0.1064 -0.1964 0.1246 -0.01128 508288 +rs10 0.2414 0.06343 -0.1089 0.07171 -0.004017 401959 diff --git a/example_output.txt.gz b/example_output.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..37a3662bd61863993934943b4714b1ba1f748085 Binary files /dev/null and b/example_output.txt.gz differ diff --git a/j2s.py b/j2s.py index 02065c39db0c8667c9faf7a93120b9ae146d0f03..617309b69e27db2b451116255149684999b6693d 100644 --- a/j2s.py +++ b/j2s.py @@ -12,7 +12,7 @@ stt = time.time() # Checking number of arguments (4 expected) -if len(sys.argv) != 4 : +if len(sys.argv) != 5 : print("Incorrect call: wrong number of arguments\n") sys.exit(1) @@ -24,7 +24,7 @@ outfile = sys.argv[4] print("Arguments passed:") print("\tSample size = " + str(samplesize)) print("\tN exposed = " + str(Nexpo)) -print("Output file = " + outfile + "\n") +print("\tOutput file = " + outfile + "\n") print("\n") meanE = Nexpo / samplesize @@ -56,6 +56,6 @@ df['Marg_p'] = chi2.sf((df['Marg_eff'] / df['Marg_eff_sd']) ** 2, 1) df['Marg_N'] = df['N'] # Writing output file -df.loc[:,['rsID', 'Chr', 'BP', 'MarkerName', 'Allele1', 'Allele2', 'Freq1', 'Exp_eff', 'Exp_eff_sd', 'Exp_p', 'Exp_N', 'Unexp_eff', 'Unexp_eff_sd', 'Unexp_p', 'Unexp_N', 'Marg_eff', 'Marg_eff_sd', 'Marg_p' , 'Marg_N']].to_csv(outfile, sep = '\t', index = False, header = True, compression = 'gzip') +df.loc[:,['MarkerName', 'Exp_eff', 'Exp_eff_sd', 'Exp_p', 'Exp_N', 'Unexp_eff', 'Unexp_eff_sd', 'Unexp_p', 'Unexp_N', 'Marg_eff', 'Marg_eff_sd', 'Marg_p' , 'Marg_N']].to_csv(outfile, sep = '\t', index = False, header = True, compression = 'gzip') print("Analysis finished ")