From fa1283b6cc284d9290619d2e0eae86516a69f0ca Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Wed, 9 Mar 2022 15:08:39 +0100 Subject: [PATCH] Handle new input and border cases. --- libcodonusage/__init__.py | 2 +- libcodonusage/libcodonusage.py | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index cd647f6..33f7a90 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022 Blaise Li" __licence__ = "GNU GPLv3" -__version__ = 0.6 +__version__ = 0.7 from .libcodonusage import ( aa2colour, codon2aa, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index e8c8eea..fe0429c 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -86,7 +86,8 @@ with Path(bgraphs.colorschemes._scheme_dir).joinpath( aa2colour = {**colscheme["colors"], "*": '#000000'} -def load_counts_table(table_path, index_col="old_locus_tag"): +def load_counts_table( + table_path, index_col="old_locus_tag", index_unique=True): """ Load a table or pre-computed codon counts at *table_path*. @@ -94,9 +95,14 @@ def load_counts_table(table_path, index_col="old_locus_tag"): Besides the columns containing the counts for each codon, there are other columns containing various pieces of information regarding those genes. + + If *index_unique* is True, a ValueError error will be raised if + some elements in the the column *index_col* are not unique. """ render_md(f"Loading data from [{table_path}]({table_path})...\n") codon_counts = pd.read_table(table_path, index_col=index_col) + if index_unique and not(codon_counts.index.is_unique): + raise ValueError(f"Index {index_col} contains repeated values.\n") nb_genes = len(codon_counts) render_md( f""" @@ -144,9 +150,12 @@ def compute_criteria(codon_counts): display(criteria.agg(sum)) render_md("Upset plot of the non-empty categories:\n\n") fig = plt.figure() - UpSet( - from_indicators(*[criteria.columns], data=criteria), - show_counts=True).plot(fig=fig) + try: + UpSet( + from_indicators(*[criteria.columns], data=criteria), + show_counts=True).plot(fig=fig) + except AttributeError: + pass display(fig) plt.close(fig) gene_sets = { @@ -261,13 +270,16 @@ might be excluded. return criteria -def make_counts_only(counts_table): +def make_counts_only( + counts_table, + seq_id_kw="locus_tag", alt_tag_kw="old_locus_tag"): """ Integrate "informative" columns of *counts_table* into the index. """ # To ensure a stable order: ref_info_cols = [ - "old_locus_tag", "locus_tag", "length", + alt_tag_kw, seq_id_kw, + "start", "end", "length", "start_codon", "expected_start_aa", "first_stop", "nb_stops", "start_upstream", "end_downstream"] # To ensure no info columns are lost: -- GitLab