diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index bcdaab8a7bd07f11e692a11570d5585337cc3302..2065ff6cc9cf98cba92432da80563236b974184b 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022-2023 Blaise Li" __licence__ = "GNU GPLv3" -__version__ = "0.27.4" +__version__ = "0.27.5" from .libcodonusage import ( aa2colour, aa_usage, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 2408e4a5282429240f3e83f776c118729e30bdcc..2be726c0799f6e20b0303306f2a8f549ce83bd85 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -531,7 +531,7 @@ def check_aa_codon_columns(table): Check that the columns of *table* correspond to (aa, codon) pairs. """ msg = "Codon proportions table should have two levels: 'aa' and 'codon'" - if table.columns.nlevels !=2: + if table.columns.nlevels != 2: raise ValueError(msg) if table.columns.names[0] != "aa": raise ValueError(msg) @@ -549,6 +549,9 @@ def compute_rscu(codon_proportions_by_aa): same amino-acid. The column index should be a pandas MultiIndex where the first level is the amino-acid name, and the second level the codon. + + Return a pair where the first element is the rscu table, and + the second is the degeneracy table. """ check_aa_codon_columns(codon_proportions_by_aa) degeneracy = pd.Series( @@ -578,7 +581,9 @@ def compute_rscu(codon_proportions_by_aa): itemgetter(0), codon_proportions_by_aa.columns)).items()])), index=codon_proportions_by_aa.columns) - return codon_proportions_by_aa.mul(degeneracy) + return ( + codon_proportions_by_aa.mul(degeneracy), + degeneracy) def by_aa_codon_usage( @@ -640,13 +645,14 @@ This amounts to computing codon counts relative to the average codon counts for that amino-acid. (This corresponds to R3 in {SUZUKI_LINK}) """) - rscu = compute_rscu(codon_proportions) + (rscu, degeneracy) = compute_rscu(codon_proportions) render_md(f""" We will also compute R4 ({SUZUKI_LINK}) by computing codon counts relative to the maxium across synonymous codons of the counts for that amino-acid. """) + max_counts = codon_counts.T.groupby("aa").max().T r4_table = codon_counts.div( - codon_counts.T.groupby("aa").max().T) + max_counts) if ref_filter_dict is None: counts_for_global = codon_counts else: @@ -695,7 +701,8 @@ across genes) so that they are more comparable between codons. "proportions": codon_proportions, "rscu": rscu, "r4_table": r4_table, - "global_proportions": global_proportions} + "global_proportions": global_proportions, + "global_rscu": global_proportions.mul(degeneracy)} return standardized_codon_usage_biases @@ -944,7 +951,7 @@ def centroid_usage(codon_counts, all_nan_cols): global_usage = codon_counts.sum(axis=0) global_summed_by_aa = global_usage.groupby(level=0).sum() global_proportions_by_aa = global_usage.div( - global_summed_by_aa) + global_summed_by_aa) codon_proportions_by_aa = codon_counts.div(summed_by_aa) codon_usage_biases_by_aa = codon_proportions_by_aa.sub( global_proportions_by_aa)