diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index b6c2fcc1a58d8c00aad8265a5bee03e67810dd49..e8f5b6b003a00e226ae24fdce8d1356a62c334ad 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022-2023 Blaise Li" __licence__ = "GNU GPLv3" -__version__ = "0.27.1" +__version__ = "0.27.2" from .libcodonusage import ( aa2colour, aa_usage, @@ -21,6 +21,7 @@ from .libcodonusage import ( gene_wide_codon_usage, load_bias_table, load_counts_table, + load_table_with_info_index, make_aa_codon_columns, make_cluster_table, make_centroids_cluster_finder, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 848efc2bee85e8089d53bac09e1100e576c8f442..62c12591763049bc37acf36e60c99c4368ec8c1e 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -362,6 +362,42 @@ def save_counts_table(counts_table, table_path): render_md(f"The table was saved at [{table_path}]({table_path}).") +def load_table_with_info_index(table_path, nb_info_cols, nb_cluster_series=0): + """ + Load a table containing by-amino-acid codon counts or usage biases. + + The table, located at *table_path*, should have tab-separated columns. + It is expected to contain a series of informative columns + preceding the columns containing the codon usage bias values. + + The first *nb_info_cols* columns contain information about gene + identifiers and features extracted from CDS annotation data. + + Then, there can be series of gene-clustering information, where each + column of the series corresponds to clusters defined based on + codon-usage biases among the codons coding an amino-acid. + There are no columns for amino-acids coded by only one codon (M and W). + *nb_cluster_series* specifies the number of such series. + + The result is a pandas DataFrame object, where all the above information + is encoded as a MultiIndex, the codon counts or usage biases being in the + remaining columns. + """ + return pd.read_csv( + table_path, + sep="\t", + # *nb_info_cols* starting levels from the initial table, + # plus levels corresponging to clustering information. + # By default, from 2 methods (nb_cluster_series=2): + # * `cluster_{aa}_kmeans` for each amino-acid + # having more than one codon + # * `cluster_{aa}_full_bias` for each amino-acid + # having more than one codon + index_col=list( + range(nb_info_cols + nb_cluster_series * (len(aa2colour) - 2))), + header=[0, 1]) + + def filter_on_idx_levels(counts_table, filter_dict): """ Filter a table *counts_table* based on values of certain index levels. @@ -1034,19 +1070,8 @@ def load_bias_table(table_path, nb_info_cols=9, nb_cluster_series=2): is encoded as a MultiIndex, the codon usage biases being in the remaining columns. """ - return pd.read_csv( - table_path, - sep="\t", - # *nb_info_cols* starting levels from the initial table, - # plus levels corresponging to clustering information. - # By default, from 2 methods (nb_cluster_series=2): - # * `cluster_{aa}_kmeans` for each amino-acid - # having more than one codon - # * `cluster_{aa}_full_bias` for each amino-acid - # having more than one codon - index_col=list( - range(nb_info_cols + nb_cluster_series * (len(aa2colour) - 2))), - header=[0, 1]) + return load_table_with_info_index( + table_path, nb_info_cols, nb_cluster_series) def star2stop(text):