diff --git a/jass/models/inittable.py b/jass/models/inittable.py index a7cd1b5e16f70139d89f22ada36a185a78144715..f49fef2e3ad05c98ac271895238e8d7da3eb4232 100644 --- a/jass/models/inittable.py +++ b/jass/models/inittable.py @@ -9,6 +9,7 @@ import re import glob import logging from pandas import HDFStore, DataFrame, read_csv, concat, options, read_hdf +import h5py import pandas as pd # create (or open) an hdf5 file and opens in append mode import numpy as np @@ -29,10 +30,13 @@ class InitMeta(object): def get_inittable_meta(file_name): init_store = HDFStore(file_name, mode='r') nb_snps = init_store.get_storer("SumStatTab").nrows - name=f"Name of {file_name.split('/')[-1]}" - desc=f"Description {file_name.split('/')[-1]}" init_store.close() nb_phenotypes = read_hdf(file_name, "PhenoList").shape[0] + + f = h5py.File(file_name, mode='r') + name=f.attrs['title'] + desc=f.attrs["description"] + f.close() return dict( nb_snps=int(nb_snps), nb_phenotypes=int(nb_phenotypes), @@ -40,6 +44,15 @@ def get_inittable_meta(file_name): desc=desc, ) +def add_inittable_meta(file_name, title, description): + """ + add description in hdf5 attributes + """ + f = h5py.File(file_name, mode='a') + f.attrs['title'] = title + f.attrs["description"] = description + f.close() + def get_gwasname(file_name): return "_".join(os.path.basename(file_name).split("_")[0:3]) @@ -51,7 +64,6 @@ def check_if_SNP_unique(z_gwas_chrom): ) raise IOError(msg) - def get_gwas_dict(input_data_path): gwas_dict = {} # retrieve all files corresponding to glob patterns diff --git a/jass/test/data_real/summary.csv b/jass/test/data_real/summary.csv old mode 100755 new mode 100644 diff --git a/jass/test/data_test1/initTable.hdf5 b/jass/test/data_test1/initTable.hdf5 index 904ed8c837cac132e7e8795377c2dd559e40ee4a..a6bea734cfee2da3ed7eb8b419ff703895375c29 100644 Binary files a/jass/test/data_test1/initTable.hdf5 and b/jass/test/data_test1/initTable.hdf5 differ diff --git a/jass/test/data_test2/initTable.hdf5 b/jass/test/data_test2/initTable.hdf5 index 035a9cc84f3df0ba479ce53a8ebd55b6f7953833..4737c04b0bda81e993626705b0325d71944700f0 100644 Binary files a/jass/test/data_test2/initTable.hdf5 and b/jass/test/data_test2/initTable.hdf5 differ diff --git a/scripts/hdf5_add_attributes.py b/scripts/hdf5_add_attributes.py new file mode 100644 index 0000000000000000000000000000000000000000..8b5ab65729e6a37de281310a0bcb2d3783daccfa --- /dev/null +++ b/scripts/hdf5_add_attributes.py @@ -0,0 +1,23 @@ + +from jass.models.inittable import get_inittable_meta, add_inittable_meta +# we need python package h5py to read/write .hdf5 file + + + +if __name__ == "__main__": + + title = 'Curated GWAS summary statistics on African ancestry on 19 blood count traits and glycemic traits (hg38)' + des = 'Genome wide curated summary statistics on 19 blood count traits and glycemic traits' \ + 'File format is the inittable format intended to be used with the Joint Analysis of Summary Statistics (JASS), which allows to perform multi-trait GWAS:' \ + 'https://gitlab.pasteur.fr/statistical-genetics/jass' \ + 'GWAS of hematological traits originate from Chen et al paper and were downloaded from the GWAS Catalog (https://www.ebi.ac.uk/gwas/publications/32888493#study_panel). GWAS of glycemic traits come from the (18) study downloadable from GWAS Catalog (https://www.ebi.ac.uk/gwas/publications/34059833).' + hdf5_file = '/pasteur/zeus/projets/p02/GGS_JASS/jass_pipeline_dev_copie/jass/jass/test/data_test2/initTable.hdf5' + + add_inittable_meta(hdf5_file, title, des) + + print(get_inittable_meta(hdf5_file)) + + + + +