From faf04a848c03ff7c3a11d34ef8b634fc2d68370e Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Thu, 14 Sep 2023 13:18:58 +0200 Subject: [PATCH] Add checks on column formats. --- libcodonusage/libcodonusage.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 62c1259..4c2e6a6 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -502,6 +502,19 @@ across genes) so that they are more comparable between codons. return standardized_codon_usage_biases +def check_aa_codon_columns(table): + """ + Check that the columns of *table* correspond to (aa, codon) pairs. + """ + msg = "Codon proportions table should have two levels: 'aa' and 'codon'" + if codon_proportions_by_aa.columns.nlevels !=2: + raise ValueError(msg) + if codon_proportions_by_aa.columns.names[0] != "aa": + raise ValueError(msg) + if codon_proportions_by_aa.columns.names[1] != "codon": + raise ValueError(msg) + + def compute_rscu(codon_proportions_by_aa): """ Compute Relative Syninymous Codon Usage from proportions in genes. @@ -513,6 +526,7 @@ def compute_rscu(codon_proportions_by_aa): where the first level is the amino-acid name, and the second level the codon. """ + check_aa_codon_columns(codon_proportions_by_aa) degeneracy = pd.Series( # concat "flattens" the list of iterables given as arguments # (list of tuples of repeated degeneracy values) @@ -558,6 +572,7 @@ def by_aa_codon_usage( restricted to the genes where the *index_level* has the *index_value* for all those pairs. """ + check_aa_codon_columns(codon_counts) render_md(f""" We will compute codon usage "by amino-acid", by looking at the proportion of codons for each amino-acid within a gene's CDS. @@ -667,6 +682,7 @@ def aa_usage( restricted to the genes where the *index_level* has the *index_value* for all those pairs. """ + check_aa_codon_columns(codon_counts) render_md(""" We will compute amino-acid usage, by looking at the proportions of amino-acids within a gene's CDS. @@ -892,6 +908,7 @@ def centroid_usage(codon_counts, all_nan_cols): For each amino-acid, there is one centroid per codon, where the proportion for this codon is set to 1.0, and 0.0 for the other codons. """ + check_aa_codon_columns(codon_counts) summed_by_aa = codon_counts.groupby(level=0, axis=1).sum() global_usage = codon_counts.sum(axis=0) global_summed_by_aa = global_usage.groupby(level=0).sum() -- GitLab