From 1f8e9867b86344d69b37bc4ff44648bddd14ccd3 Mon Sep 17 00:00:00 2001 From: hjulienn <hanna.julienne@pasteur.fr> Date: Tue, 14 Nov 2023 17:50:27 +0100 Subject: [PATCH] added title and description to test data --- jass/models/inittable.py | 18 +++++++++++++++--- jass/test/data_real/summary.csv | 0 jass/test/data_test1/initTable.hdf5 | Bin 693544 -> 699688 bytes jass/test/data_test2/initTable.hdf5 | Bin 786371 -> 792515 bytes scripts/hdf5_add_attributes.py | 23 +++++++++++++++++++++++ 5 files changed, 38 insertions(+), 3 deletions(-) mode change 100755 => 100644 jass/test/data_real/summary.csv create mode 100644 scripts/hdf5_add_attributes.py diff --git a/jass/models/inittable.py b/jass/models/inittable.py index a7cd1b5e..f49fef2e 100644 --- a/jass/models/inittable.py +++ b/jass/models/inittable.py @@ -9,6 +9,7 @@ import re import glob import logging from pandas import HDFStore, DataFrame, read_csv, concat, options, read_hdf +import h5py import pandas as pd # create (or open) an hdf5 file and opens in append mode import numpy as np @@ -29,10 +30,13 @@ class InitMeta(object): def get_inittable_meta(file_name): init_store = HDFStore(file_name, mode='r') nb_snps = init_store.get_storer("SumStatTab").nrows - name=f"Name of {file_name.split('/')[-1]}" - desc=f"Description {file_name.split('/')[-1]}" init_store.close() nb_phenotypes = read_hdf(file_name, "PhenoList").shape[0] + + f = h5py.File(file_name, mode='r') + name=f.attrs['title'] + desc=f.attrs["description"] + f.close() return dict( nb_snps=int(nb_snps), nb_phenotypes=int(nb_phenotypes), @@ -40,6 +44,15 @@ def get_inittable_meta(file_name): desc=desc, ) +def add_inittable_meta(file_name, title, description): + """ + add description in hdf5 attributes + """ + f = h5py.File(file_name, mode='a') + f.attrs['title'] = title + f.attrs["description"] = description + f.close() + def get_gwasname(file_name): return "_".join(os.path.basename(file_name).split("_")[0:3]) @@ -51,7 +64,6 @@ def check_if_SNP_unique(z_gwas_chrom): ) raise IOError(msg) - def get_gwas_dict(input_data_path): gwas_dict = {} # retrieve all files corresponding to glob patterns diff --git a/jass/test/data_real/summary.csv b/jass/test/data_real/summary.csv old mode 100755 new mode 100644 diff --git a/jass/test/data_test1/initTable.hdf5 b/jass/test/data_test1/initTable.hdf5 index 904ed8c837cac132e7e8795377c2dd559e40ee4a..a6bea734cfee2da3ed7eb8b419ff703895375c29 100644 GIT binary patch delta 1005 zcmbVK&1w`u5boL4WD^q+`~l6Sz{_qhGscZD34$&QSqVZ8d+?_8&h*Z7?CBn+dl>gL z2M=BiB75>41VM@5JLKfqC-CYEh}FHTMo<t7YNorYy1uV|j^3;veO`aj@!lS;hu+e( zzSF$?{^?O;`squfb*}wtnOJ#8$aMd1bNcM0`TV5mp*B$M`3&k=)CJT<)E4T}$CKvr zqr;cbyjI_H?>(|WE|C^7+z5u2tDZ+L;NPm}B3kY{I}?)Nt^In2ZEY*${@lOf^2AI> z1<wsv(%Ju-@4RypGHAGj)82>H#p7>lyNmUpzyH9&Sq><tq3yk27Sym91`m5f&}Ej< zVglMw!?od&1|^|4E_g&GP#H08(2cguZ5RoqVu(~J4VZ#*qcI|eL`))<@n~kNHMG;D z8+HgXb7Co488E3b1{IGPL~{~<#{X9=y~_oIu_`iZfNL-*gPKcj3>}?vNW)}|6&nR3 z2BpSh#ZBtM_Y}u!dXkEX#tdo<!+A@Fzgp7X>kWsU8&IV@N`VTYDg;xIv%-QyRtm#6 zTuY#~ecefo$@NYUB)AuJ<mXfyRu=xa2!3{&ij9OxW_unSP_5lO5I<#DjS?!s5r`js zRs~PE#JRz^P#N@7CV?5G0y#w%ZVnYI7{s-zq)^n>>q2c$T<ShG6tm)V=G>~P^4W;{ zH1f+QK~hm2<mE^ptX)4HbZ>=WxV7EAu1y(F9_Lgt(eY==kmv7X*SHrrT!ch=7PGw> mc5tomCB$<|v!Pi2|4C@;=I!mU+wJ(Xg~Bha9)DfCvicog0x97D delta 90 zcmZ2+OKZg{tqGcpQzvR!FtSarw_z6DSh0d>y236dPA&!rSi!_F-QS*hdh;pf_EXG^ eK+FWhAT|pSvjQ<25VHd@2M}{^KgG-yHxU2^jvR~t diff --git a/jass/test/data_test2/initTable.hdf5 b/jass/test/data_test2/initTable.hdf5 index 035a9cc84f3df0ba479ce53a8ebd55b6f7953833..4737c04b0bda81e993626705b0325d71944700f0 100644 GIT binary patch delta 1015 zcmbVK!EO^V5cMX|rZf^oDnf7?L=TOiyDced+7nSEf+{2qLE?y<U1!(ewHMo~C8t#4 zz!k(HzrYPp4}3sw{UgMojGfRH2_%H2=e5V)ndkRrb{fq-Z@+KVZZtpK-3wNx*X+{u zuWvrprfbiF#_H^JmALVRkm=E0WBT@_{_dn6AZiG=zJ$1pSVAl#8i*Cdm2W5Y)$cz( zLjBg{q0bSJC9+N$#BwVbMs5ZHS;MbG;BPeiJYN%%;%>IwJc~8S<Hfq^^CZl~nirNU z>EnORx9*P#>DSH^){b9q43;PT-qR<(o+E+dVr{qYm6}?XK>vAn0H(}ys>fg~wcJ=9 z8&DFuBh6zffy$T}i^mw-*@vM}DuGy)(t_2LTZ1<eNX0m2IgjVDiGj^5ZFgD(srlhJ zy3uD+<qRsGFo+i<{#^fGYnew}Fc>MFQw!XH%@|C$<kr&Rg#&4sOz_24!H_{|a9MGi z`Rj*@<7>K-im|~AY6OFYB!hEFHV?alLF*n=8ILodLZ}LhDJYnB#UU?+<y&47m^fVz zGHVNS5Jf3A1|5b4HI|h+9O>vxX)3l-CYjSbdP$A(eqjBK;cJvoDOw<Y>8vzQxkTS! zq*V^Rj7eY?sX#%Ih4-Oinn5yAl@yA)_xh_&6Q8<AEyb+p&VpN2RUsSlkj7#8DoQJA zqM{rMEbIEuMD6>XPG@(&eaF}`8NVo~WTF+$lOfOFZ8z8#Xzoa($6naYolXne3jc&; jL1}J^<Nu$8cDEnwciQb%IBzJN;rRC2)!_JOqtW~caEU59 delta 95 zcmX@S-r(?m{Rx^(|G6h>n=`Ua7c5~D-B_`LX<8s72X_Yp1gv0UnBHK{F}?W|bNeY~ hMj&PaVi21Jh*^P{4T#x+m;;D8ftYLiDQ51UzX8VYAISg! diff --git a/scripts/hdf5_add_attributes.py b/scripts/hdf5_add_attributes.py new file mode 100644 index 00000000..8b5ab657 --- /dev/null +++ b/scripts/hdf5_add_attributes.py @@ -0,0 +1,23 @@ + +from jass.models.inittable import get_inittable_meta, add_inittable_meta +# we need python package h5py to read/write .hdf5 file + + + +if __name__ == "__main__": + + title = 'Curated GWAS summary statistics on African ancestry on 19 blood count traits and glycemic traits (hg38)' + des = 'Genome wide curated summary statistics on 19 blood count traits and glycemic traits' \ + 'File format is the inittable format intended to be used with the Joint Analysis of Summary Statistics (JASS), which allows to perform multi-trait GWAS:' \ + 'https://gitlab.pasteur.fr/statistical-genetics/jass' \ + 'GWAS of hematological traits originate from Chen et al paper and were downloaded from the GWAS Catalog (https://www.ebi.ac.uk/gwas/publications/32888493#study_panel). GWAS of glycemic traits come from the (18) study downloadable from GWAS Catalog (https://www.ebi.ac.uk/gwas/publications/34059833).' + hdf5_file = '/pasteur/zeus/projets/p02/GGS_JASS/jass_pipeline_dev_copie/jass/jass/test/data_test2/initTable.hdf5' + + add_inittable_meta(hdf5_file, title, des) + + print(get_inittable_meta(hdf5_file)) + + + + + -- GitLab