diff --git a/jass/models/stats.py b/jass/models/stats.py index 7d3ed4c5f2116566200b1c5eb658b7525bc5c324..f069a6e6516724233c322469930a757340648f06 100755 --- a/jass/models/stats.py +++ b/jass/models/stats.py @@ -48,8 +48,7 @@ def make_stat_computer_nan(cov, stat_func): if pattern_code in invcov_bypattern: invcov = invcov_bypattern[pattern_code] else: - - z_na_bool = [bool(int(val)) for val in z_na_pattern] + z_na_bool = ~z.iloc[0,].isnull() mini_cov = cov.loc[z_na_bool, z_na_bool] invcov = np.linalg.inv(mini_cov) invcov_bypattern[pattern_code] = invcov diff --git a/jass/models/worktable.py b/jass/models/worktable.py index a9681d222e1c818956b3ed743914b77843980b45..a4a6c52358e53f6ef797f97c3f0dc735a21c5f17 100755 --- a/jass/models/worktable.py +++ b/jass/models/worktable.py @@ -10,7 +10,7 @@ import importlib from typing import List #from dask.dataframe import read_hdf as dask_read_hdf -from pandas import HDFStore, DataFrame, concat, read_hdf +from pandas import HDFStore, DataFrame, concat, read_hdf, Series, Index # create (or open) an hdf5 file and opens in append mode import numpy as np import scipy.stats as spst @@ -91,19 +91,24 @@ def create_worktable_file(phenotype_ids: List[str], init_file_path: str, project sum_stat_jost_tab['PVALJOST'] = stat_compute(sum_stat_jost_tab[phenotype_ids])#.apply(stat_compute, axis=1) else: # Sort SumStatTab by missing patterns - patterns_missing = pd.Series(np.dot((1- sum_stat_jost_tab[phenotype_ids].isnull()), 10**np.arange((N_pheno-1), -1, -1))) + patterns_missing = Series(np.dot((1- sum_stat_jost_tab[phenotype_ids].isnull()), 10**np.arange((N_pheno-1), -1, -1))) pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) print("Frequency of missing patterns :") print(pattern_frequency) frequent_pattern = pattern_frequency.index[pattern_frequency > 0.05].tolist() # index on missing patterns: - sum_stat_jost_tab.index = pd.Index(patterns_missing) + sum_stat_jost_tab.index = Index(patterns_missing) # Keep_only frequent_pattern sum_stat_jost_tab = sum_stat_jost_tab.loc[frequent_pattern] # Apply the statistic computation by missing patterns for pattern in frequent_pattern: - sum_stat_jost_tab.loc[frequent_pattern, "PVALJOST"] = stat_compute(sum_stat_jost_tab.loc[frequent_pattern, phenotype_ids]) + print('>>>>') + print(sum_stat_jost_tab.loc[pattern, phenotype_ids].shape) + print(stat_compute(sum_stat_jost_tab.loc[pattern, phenotype_ids]).shape) + print('<<<<') + sum_stat_jost_tab.loc[pattern, "PVALJOST"] = np.array(range(6)) + #stat_compute(sum_stat_jost_tab.loc[pattern, phenotype_ids]) sum_stat_jost_tab.sort_values(by=["Region", "CHR"], inplace=True) diff --git a/jass/test/test_worktable.py b/jass/test/test_worktable.py index f54a1d81064bd42b3cd107131e10cc40fd3cb94f..080a5d46b8afb0a854e9058b02215a74d38e6491 100644 --- a/jass/test/test_worktable.py +++ b/jass/test/test_worktable.py @@ -22,7 +22,9 @@ class TestWorkTable(object): cls.expected_hdf_path = cls.get_file_path_fn('worktable.hdf5') create_worktable_file(cls.phenotypes_sel, init_file_path, cls.result_hdf_path, cls.remove_nan) cls.expected_sumstatjosttab = read_hdf(cls.expected_hdf_path, 'SumStatJostTab') + cls.expected_sumstatjosttab.reset_index(drop=True) cls.result_sumstatjosttab = read_hdf(cls.result_hdf_path, 'SumStatJostTab') + cls.result_sumstatjosttab.reset_index(drop=True) cls.expected_regionsubtable = read_hdf(cls.expected_hdf_path, 'RegionSubTable') cls.result_regionsubtable = read_hdf(cls.result_hdf_path, 'RegionSubTable') cls.expected_summarytable = read_hdf(cls.expected_hdf_path, 'summaryTable')