diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index 5b810c16860d88d66fdea65006cb18b59ca456c8..3107f30870c7f07d494f2065049442353da363ab 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022 Blaise Li" __licence__ = "GNU GPLv3" -__version__ = "0.11" +__version__ = "0.12" from .libcodonusage import ( aa2colour, aa_usage, @@ -8,6 +8,7 @@ from .libcodonusage import ( codon2aa, columns_by_aa, detect_fishy_genes, + exclude_all_nan_cols, gene_wide_codon_usage, load_bias_table, load_counts_table, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 761e24e7093aa921f97341b6964c88d7a126c85d..edc2ecd9757d2a0701d8d92432dfa12bd256e097 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -545,6 +545,30 @@ across genes) so that they are more comparable between amino-acids. return standardized_aa_usage_biases +def exclude_all_nan_cols(standardized_usage_biases): + """ + Detect columns in *standardized_usage_biases* that contain only NaNs + and remove them from the table. + """ + render_md(""" +Standardization may result in division by zero for usage biases +that have a zero standard deviation. +This is expected to be the case for "by amino-acid" usage biases +for codons corresponding to amino-acids having only one codon: +methionine (M) and tryptophan (W). +""") + all_nan_cols = standardized_usage_biases.columns[ + standardized_usage_biases.isna().all()] + if len(all_nan_cols): + render_md("The following columns contain only NaNs:") + display(all_nan_cols) + render_md("This likely resulted from a division by zero.") + render_md("These columns will be excluded") + return ( + standardized_usage_biases.drop(columns=all_nan_cols).fillna(0), + all_nan_cols) + + def load_bias_table(table_path, nb_info_cols=9, nb_cluster_series=2): """ Load a table containing by-amino-acid codon usage biases.