diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index 266ff34823d8448aeba433a123fc6408e1e4fd92..fcfc26c8e6623c143dc39989e989dca01b4c9bc7 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -30,6 +30,7 @@ from .libcodonusage import ( render_md, save_counts_table, sort_counts_by_aa, + split_info_index, star2stop, to_long_form, violin_usage, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index dbd184854ae9faddab58cd0fced8681daecf78da..cf50fbf9ffe6ff79eadae0e6b4980a3a3287d0c1 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -398,6 +398,30 @@ def load_table_with_info_index(table_path, nb_info_cols, nb_cluster_series=0): header=[0, 1]) +def split_info_index(table, keep_index_cols): + """ + Split table *table* into info and data. + + Info is supposed to be contained in the index of DataFrame *table*, + data in its columns. + + Return a pair of tables, one for the data, one for the info, + where the index contains only the levels listed in *keep_index_cols* + """ + drop_info_cols = [ + colname for colname in table.index.names + if colname not in keep_index_cols] + info_table = table.reset_index()[ + [*keep_index_cols, *drop_info_cols]].set_index(keep_index_cols) + # To avoid loss of index levels in input by side effect: + out_table = table.copy() + out_table.index = out_table.index.droplevel(drop_info_cols) + # To ensure indices have their levels in the same order + # in out_table and in infoinfo_table: + out_table = out_table.reset_index().set_index(keep_index_cols) + return (out_table, info_table) + + def filter_on_idx_levels(counts_table, filter_dict): """ Filter a table *counts_table* based on values of certain index levels.