Skip to content
Snippets Groups Projects
Commit 2f2bba77 authored by Blaise Li's avatar Blaise Li
Browse files

Also compute global RSCU.

Hopefully, this can be used to fill NaNs for PCA.
parent d83084c7
No related branches found
No related tags found
No related merge requests found
__copyright__ = "Copyright (C) 2022-2023 Blaise Li"
__licence__ = "GNU GPLv3"
__version__ = "0.27.4"
__version__ = "0.27.5"
from .libcodonusage import (
aa2colour,
aa_usage,
......
......@@ -531,7 +531,7 @@ def check_aa_codon_columns(table):
Check that the columns of *table* correspond to (aa, codon) pairs.
"""
msg = "Codon proportions table should have two levels: 'aa' and 'codon'"
if table.columns.nlevels !=2:
if table.columns.nlevels != 2:
raise ValueError(msg)
if table.columns.names[0] != "aa":
raise ValueError(msg)
......@@ -549,6 +549,9 @@ def compute_rscu(codon_proportions_by_aa):
same amino-acid. The column index should be a pandas MultiIndex
where the first level is the amino-acid name, and the second level
the codon.
Return a pair where the first element is the rscu table, and
the second is the degeneracy table.
"""
check_aa_codon_columns(codon_proportions_by_aa)
degeneracy = pd.Series(
......@@ -578,7 +581,9 @@ def compute_rscu(codon_proportions_by_aa):
itemgetter(0),
codon_proportions_by_aa.columns)).items()])),
index=codon_proportions_by_aa.columns)
return codon_proportions_by_aa.mul(degeneracy)
return (
codon_proportions_by_aa.mul(degeneracy),
degeneracy)
def by_aa_codon_usage(
......@@ -640,13 +645,14 @@ This amounts to computing codon counts relative to the average codon counts
for that amino-acid.
(This corresponds to R3 in {SUZUKI_LINK})
""")
rscu = compute_rscu(codon_proportions)
(rscu, degeneracy) = compute_rscu(codon_proportions)
render_md(f"""
We will also compute R4 ({SUZUKI_LINK}) by computing codon counts relative
to the maxium across synonymous codons of the counts for that amino-acid.
""")
max_counts = codon_counts.T.groupby("aa").max().T
r4_table = codon_counts.div(
codon_counts.T.groupby("aa").max().T)
max_counts)
if ref_filter_dict is None:
counts_for_global = codon_counts
else:
......@@ -695,7 +701,8 @@ across genes) so that they are more comparable between codons.
"proportions": codon_proportions,
"rscu": rscu,
"r4_table": r4_table,
"global_proportions": global_proportions}
"global_proportions": global_proportions,
"global_rscu": global_proportions.mul(degeneracy)}
return standardized_codon_usage_biases
......@@ -944,7 +951,7 @@ def centroid_usage(codon_counts, all_nan_cols):
global_usage = codon_counts.sum(axis=0)
global_summed_by_aa = global_usage.groupby(level=0).sum()
global_proportions_by_aa = global_usage.div(
global_summed_by_aa)
global_summed_by_aa)
codon_proportions_by_aa = codon_counts.div(summed_by_aa)
codon_usage_biases_by_aa = codon_proportions_by_aa.sub(
global_proportions_by_aa)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment