Skip to content
Snippets Groups Projects
Commit cb177e00 authored by Blaise Li's avatar Blaise Li
Browse files

Update group_codons_by_class based on groupby.

parent 89ba6591
No related branches found
No related tags found
No related merge requests found
__copyright__ = "Copyright (C) 2022-2023 Blaise Li, Marie Anselmet" __copyright__ = "Copyright (C) 2022-2023 Blaise Li, Marie Anselmet"
__licence__ = "GNU GPLv3" __licence__ = "GNU GPLv3"
__version__ = "0.28.2" __version__ = "0.28.3"
from .libcodonusage import ( from .libcodonusage import (
aa2colour, aa2colour,
aa_usage, aa_usage,
......
...@@ -416,7 +416,7 @@ def split_info_index(table, keep_index_cols): ...@@ -416,7 +416,7 @@ def split_info_index(table, keep_index_cols):
out_table = table.copy() out_table = table.copy()
out_table.index = out_table.index.droplevel(drop_info_cols) out_table.index = out_table.index.droplevel(drop_info_cols)
# To ensure indices have their levels in the same order # To ensure indices have their levels in the same order
# in out_table and in infoinfo_table: # in out_table and in info_table:
out_table = out_table.reset_index().set_index(keep_index_cols) out_table = out_table.reset_index().set_index(keep_index_cols)
return (out_table, info_table) return (out_table, info_table)
...@@ -477,7 +477,7 @@ def max_codon_counts(row, codons): ...@@ -477,7 +477,7 @@ def max_codon_counts(row, codons):
return max(counts_codons) return max(counts_codons)
def group_codons_by_class( def group_codons_by_class_old(
codon_counts, group_name, dict_classes, codon_counts, group_name, dict_classes,
mode="max", keep_only_groups=False): mode="max", keep_only_groups=False):
""" """
...@@ -517,6 +517,48 @@ def group_codons_by_class( ...@@ -517,6 +517,48 @@ def group_codons_by_class(
return codon_counts return codon_counts
def group_codons_by_class(
codon_counts, group_name, dict_classes,
mode="max", keep_only_groups=False):
"""
Group codons given specific classes in *codon_counts* table.
*group_name* contains the name of the grouping, and plays the role
of aa names in the original codon counts table.
*dict_classes* contains the different classes under this grouping
as keys and the associated list of codons as values.
*mode* defines the way grouping is computed. It should be the name of
a method of the `GroupBy` object, like "sum" or "max".
If mode is "max" (default), the maximum value of counts of codons
belonging to the same class is used for the grouped class.
If *mode* is "sum", the sum of counts values for all codons
belonging to the same class is used for the grouped class.
*keep_only_groups* is a boolean set to True if you want to filter out
other codons than the ones specified in dict_classes.
If set to False (default), the original codon_counts table
is returned with additional columns for the grouped_classes.
"""
col_renames = {
(aa, codon): (class_name, group_name, codon)
for class_name in dict_classes
for (aa, codon) in dict_classes[class_name]}
codon_counts_pre_group = codon_counts[col_renames.keys()]
# We add a level to the column names,
# "codon", becoming "sub_codon", will disappear
# after the groupby operation
# group_name will be used as "aa", class_name as "codon".
codon_counts_pre_group.columns = pd.MultiIndex.from_tuples(
col_renames.values(), names=["codon", "aa", "sub_codon"])
# getattr accesses the method whose name is in argument *mode*
# and this method is called to get the result of the groupby operation.
codon_counts_grouped = getattr(
codon_counts_pre_group.groupby(level=["aa", "codon"]),
mode)()
if keep_only_groups:
return codon_counts_grouped
return pd.concat([codon_counts, codon_counts_grouped], axis=1)
def gene_wide_codon_usage( def gene_wide_codon_usage(
codon_counts, codon_counts,
verbose=False, return_more=False, ref_filter_dict=None, verbose=False, return_more=False, ref_filter_dict=None,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment