From 3b410576d246121d22ee3266efffd132b1be47f4 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Mon, 14 Mar 2022 17:42:32 +0100 Subject: [PATCH] Function to exclude columns with only NaNs. --- libcodonusage/__init__.py | 3 ++- libcodonusage/libcodonusage.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index 5b810c1..3107f30 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022 Blaise Li" __licence__ = "GNU GPLv3" -__version__ = "0.11" +__version__ = "0.12" from .libcodonusage import ( aa2colour, aa_usage, @@ -8,6 +8,7 @@ from .libcodonusage import ( codon2aa, columns_by_aa, detect_fishy_genes, + exclude_all_nan_cols, gene_wide_codon_usage, load_bias_table, load_counts_table, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 761e24e..edc2ecd 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -545,6 +545,30 @@ across genes) so that they are more comparable between amino-acids. return standardized_aa_usage_biases +def exclude_all_nan_cols(standardized_usage_biases): + """ + Detect columns in *standardized_usage_biases* that contain only NaNs + and remove them from the table. + """ + render_md(""" +Standardization may result in division by zero for usage biases +that have a zero standard deviation. +This is expected to be the case for "by amino-acid" usage biases +for codons corresponding to amino-acids having only one codon: +methionine (M) and tryptophan (W). +""") + all_nan_cols = standardized_usage_biases.columns[ + standardized_usage_biases.isna().all()] + if len(all_nan_cols): + render_md("The following columns contain only NaNs:") + display(all_nan_cols) + render_md("This likely resulted from a division by zero.") + render_md("These columns will be excluded") + return ( + standardized_usage_biases.drop(columns=all_nan_cols).fillna(0), + all_nan_cols) + + def load_bias_table(table_path, nb_info_cols=9, nb_cluster_series=2): """ Load a table containing by-amino-acid codon usage biases. -- GitLab