From 28b03b1e317e9a73d0be04c8f6b526009cc9ab08 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Wed, 8 Nov 2023 16:34:01 +0100 Subject: [PATCH] Change default output of group_codons_by_class. The codons belonging to the classes are not present in the output. --- libcodonusage/__init__.py | 2 +- libcodonusage/libcodonusage.py | 34 ++++++++++++++++++---------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index dc6f97f..998abc6 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022-2023 Blaise Li, Marie Anselmet" __licence__ = "GNU GPLv3" -__version__ = "0.28.5" +__version__ = "0.29.0" from .libcodonusage import ( aa2colour, aa_usage, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 69aca04..856af0e 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. """Functions used in Jupyter notebooks.""" -from itertools import combinations +from itertools import chain, combinations import json from operator import attrgetter, itemgetter from pathlib import Path @@ -442,19 +442,13 @@ SUZUKI_DOI = "10.1016/j.febslet.2005.10.032" SUZUKI_LINK = f"[Suzuki et al (2005)](https://doi.org/{SUZUKI_DOI})" -def remove_codons_old(codon_counts, codon_list): - """ - Filter out codons in a table *codon_counts* based on codons - present in the list *codon_list* (like stop codons). - """ - codon_counts.drop(columns=codon_list, inplace=True) - return codon_counts - - def remove_codons(codon_counts, codon_list): """ Filter out codons in a table *codon_counts* based on codons present in the list *codon_list* (like stop codons). + When the columns of *codon_counts* are a MultiIndex with a first + level corresponding to the amino-acid and the second to the codon, + the codons should be specified as (aa, codon) tuples. """ return codon_counts.drop(columns=codon_list) @@ -486,14 +480,14 @@ def max_codon_counts(row, codons): def group_codons_by_class( - codon_counts, group_name, dict_classes, - mode="max", keep_only_groups=False): + codon_counts, group_name, codon_classes, + mode="max", keep_only_groups=False, replace_groups=True): """ Group codons given specific classes in *codon_counts* table. *group_name* contains the name of the grouping, and plays the role of aa names in the original codon counts table. - *dict_classes* contains the different classes under this grouping + *codon_classes* contains the different classes under this grouping as keys and the associated list of codons as values. *mode* defines the way grouping is computed. It should be the name of a method of the `GroupBy` object, like "sum" or "max". @@ -502,14 +496,18 @@ def group_codons_by_class( If *mode* is "sum", the sum of counts values for all codons belonging to the same class is used for the grouped class. *keep_only_groups* is a boolean set to True if you want to filter out - other codons than the ones specified in dict_classes. + other codons than the ones specified in codon_classes. If set to False (default), the original codon_counts table is returned with additional columns for the grouped_classes. + If the boolean *replace_groups* is set to True (default), the full + count table will be returned, except the columns used to compute the + grouped values. This option is ignored if *keep_only_groups* has been + set to True. """ col_renames = { (aa, codon): (class_name, group_name, codon) - for class_name in dict_classes - for (aa, codon) in dict_classes[class_name]} + for class_name in codon_classes + for (aa, codon) in codon_classes[class_name]} codon_counts_pre_group = codon_counts[col_renames.keys()] # We add a level to the column names, # "codon", becoming "sub_codon", will disappear @@ -524,6 +522,10 @@ def group_codons_by_class( mode)() if keep_only_groups: return codon_counts_grouped + if replace_groups: + return remove_codons( + pd.concat([codon_counts, codon_counts_grouped], axis=1), + chain.from_iterable(codon_classes.values())) return pd.concat([codon_counts, codon_counts_grouped], axis=1) -- GitLab