diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index dbd699c58d9bfec165cef967162c6cf277153e67..8621fcff1e088d661396831549b48b3524c9655f 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022-2023 Blaise Li, Marie Anselmet" __licence__ = "GNU GPLv3" -__version__ = "0.28.2" +__version__ = "0.28.3" from .libcodonusage import ( aa2colour, aa_usage, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 0bc6a6d17f75298ea3c33506f12730dcaded7acb..95ff3181a21c1ee5cc9f4d42464c909a0d9978d7 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -416,7 +416,7 @@ def split_info_index(table, keep_index_cols): out_table = table.copy() out_table.index = out_table.index.droplevel(drop_info_cols) # To ensure indices have their levels in the same order - # in out_table and in infoinfo_table: + # in out_table and in info_table: out_table = out_table.reset_index().set_index(keep_index_cols) return (out_table, info_table) @@ -477,7 +477,7 @@ def max_codon_counts(row, codons): return max(counts_codons) -def group_codons_by_class( +def group_codons_by_class_old( codon_counts, group_name, dict_classes, mode="max", keep_only_groups=False): """ @@ -517,6 +517,48 @@ def group_codons_by_class( return codon_counts +def group_codons_by_class( + codon_counts, group_name, dict_classes, + mode="max", keep_only_groups=False): + """ + Group codons given specific classes in *codon_counts* table. + + *group_name* contains the name of the grouping, and plays the role + of aa names in the original codon counts table. + *dict_classes* contains the different classes under this grouping + as keys and the associated list of codons as values. + *mode* defines the way grouping is computed. It should be the name of + a method of the `GroupBy` object, like "sum" or "max". + If mode is "max" (default), the maximum value of counts of codons + belonging to the same class is used for the grouped class. + If *mode* is "sum", the sum of counts values for all codons + belonging to the same class is used for the grouped class. + *keep_only_groups* is a boolean set to True if you want to filter out + other codons than the ones specified in dict_classes. + If set to False (default), the original codon_counts table + is returned with additional columns for the grouped_classes. + """ + col_renames = { + (aa, codon): (class_name, group_name, codon) + for class_name in dict_classes + for (aa, codon) in dict_classes[class_name]} + codon_counts_pre_group = codon_counts[col_renames.keys()] + # We add a level to the column names, + # "codon", becoming "sub_codon", will disappear + # after the groupby operation + # group_name will be used as "aa", class_name as "codon". + codon_counts_pre_group.columns = pd.MultiIndex.from_tuples( + col_renames.values(), names=["codon", "aa", "sub_codon"]) + # getattr accesses the method whose name is in argument *mode* + # and this method is called to get the result of the groupby operation. + codon_counts_grouped = getattr( + codon_counts_pre_group.groupby(level=["aa", "codon"]), + mode)() + if keep_only_groups: + return codon_counts_grouped + return pd.concat([codon_counts, codon_counts_grouped], axis=1) + + def gene_wide_codon_usage( codon_counts, verbose=False, return_more=False, ref_filter_dict=None,