diff --git a/doc/Makefile b/doc/Makefile index 676d9ce78c73e6db75ab0005ba6552722d96edd3..0771baef159bcfc6c3f7ae0ace9b792a81e556c7 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/jass/models/inittable.py b/jass/models/inittable.py index 91d60957e7e5b07f33cdf42625873be8406505e5..719a92680e6ab8c86e7d7916d634b543c205728c 100755 --- a/jass/models/inittable.py +++ b/jass/models/inittable.py @@ -7,7 +7,7 @@ Created on Tue Mar 28 09:57:33 2017 import os import glob import logging -from pandas import HDFStore, DataFrame, read_csv, concat, options +from pandas import HDFStore, DataFrame, read_csv, concat, options, read_hdf # create (or open) an hdf5 file and opens in append mode import numpy as np @@ -129,25 +129,24 @@ def format_chr_gwas(gwas_file_chri, chrom, study_name, regions_bychr): (z_gwas.loc[ind, "MiddlePosition"]) = (left + right) / 2 return(z_gwas) -def compute_covariance_zscore(init_table_path): +def compute_covariance_zscore(init_file_path): print("## Compute covariance ##") - sum_stat_jost_tab = pd.read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(0, 3)) + sum_stat_jost_tab = read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(0, 3)) trait = [i for i in sum_stat_jost_tab.columns if i[:2]=="z_"] - NSNP_matrix = pd.DataFrame(index=trait, columns=trait) - cov_matrix = pd.DataFrame(index=trait, columns=trait) + NSNP_matrix = DataFrame(index=trait, columns=trait) + cov_matrix = DataFrame(index=trait, columns=trait) cov_matrix.fillna(0, inplace=True) NSNP_matrix.fillna(0, inplace=True) bi = range(0,1751,50) - start = timeit.timeit() n_len = len(bi)-1 for i in range(n_len): binf = bi[i] bsup = bi[(i+1)] - sum_stat_jost_tab = pd.read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(binf, bsup)) + sum_stat_jost_tab = read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(binf, bsup)) print("Regions {0} to {1}\r".format(binf, bsup)) j = 0 @@ -162,10 +161,9 @@ def compute_covariance_zscore(init_table_path): cov_matrix.loc[tr2,tr1] += cc.iloc[:,0].dot(cc.iloc[:,1]) NSNP_matrix.loc[tr2,tr1] += cc.shape[0] j=j+1 - end = timeit.timeit() - print(end-start) + #(cov_matrix/NSNP_matrix).to_csv("Covariance_on_Zscores.csv", sep="\t") - hdf_init = pd.HDFStore(init_table_path) + hdf_init = HDFStore(init_file_path) hdf_init.put("COV", (cov_matrix/NSNP_matrix), format="table", data_columns=True) hdf_init.close() @@ -224,7 +222,7 @@ def create_inittable_file( which_cols.extend(list(pheno_select)) hdf_init.put("PhenoList", pheno_list, format="table", data_columns=True) hdf_init.put("Regions", regions, format="table", data_columns=True) - sum_stat_tab_min_itemsizes = {"snp_ids": 50} + sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Ref_allele":70,"Alt_allele":70} regions_bychr = regions.groupby("chr") @@ -302,6 +300,7 @@ def create_inittable_file( check_if_SNP_unique(z_gwas_chrom) sum_stat_tab = z_gwas_chrom[which_cols] + hdf_init.append( "SumStatTab", sum_stat_tab, @@ -312,4 +311,4 @@ def create_inittable_file( if compute_covariance: print("Compute Covariance from Zscores") - compute_covariance_zscore(hdf_init = init_table_path) + compute_covariance_zscore(init_table_path) diff --git a/jass/models/worktable.py b/jass/models/worktable.py index 885bec15f32df4e07a975b26825f38d850832d42..a249eb9bc8cd3f765d440d7bdda7587fbcb18aa4 100755 --- a/jass/models/worktable.py +++ b/jass/models/worktable.py @@ -193,9 +193,9 @@ def create_worktable_file( ) # Covariance matrix regions = read_hdf(init_file_path, "Regions").index.tolist() - sum_stat_tab_min_itemsizes = {"snp_ids": 50, "Region": 10, "CHR": 5} + sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Region": 10, "CHR": 5,"Ref_allele" : 70, "Alt_allele":70} #['Region', "MiddlePosition", "snp_ids","CHR", "position", "Ref_allele", "Alt_allele", "JASS_PVAL", "UNIVARIATE_MIN_PVAL", "signif_status"] - region_sub_table_min_itemsizes = {"Region": 10, "index": 10, "CHR": 5, "snp_ids": 50, "signif_status":20} + region_sub_table_min_itemsizes = {"Region": 10, "index": 10, "CHR": 5, "snp_ids": 80, "signif_status":20,"Ref_allele" : 70, "Alt_allele":70} smart_na_computation = not (remove_nan) module_name, function_name = stat.split(":") diff --git a/requirements.txt b/requirements.txt index d19c39c83c11cdb3997fe01cfd0548cfa30354d8..4d1144be057841f4254e45ba33590528b14d5f21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,11 @@ -connexion == 1.1.10 -python_dateutil == 2.6.0 -setuptools >= 21.0.0 +git+https://github.com/hmenager/connexion.git#egg=connexion[swagger-ui] +aiohttp +python_dateutil +setuptools numpy -pandas >= 0.19.2 -tables >= 3.3.0 +pandas +h5py; python_version < '3.7' +tables scipy matplotlib -celery \ No newline at end of file +celery