diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index cd647f677639d3445593f1a7b2c94bb9d6aff8ba..33f7a903f73a78220944c0f9b53a216606df2c32 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022 Blaise Li" __licence__ = "GNU GPLv3" -__version__ = 0.6 +__version__ = 0.7 from .libcodonusage import ( aa2colour, codon2aa, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index e8c8eea123933142d4fc98b7d9b6246a4a174804..fe0429c07282f85dc8ab0fe7d1d6cd634d5263bb 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -86,7 +86,8 @@ with Path(bgraphs.colorschemes._scheme_dir).joinpath( aa2colour = {**colscheme["colors"], "*": '#000000'} -def load_counts_table(table_path, index_col="old_locus_tag"): +def load_counts_table( + table_path, index_col="old_locus_tag", index_unique=True): """ Load a table or pre-computed codon counts at *table_path*. @@ -94,9 +95,14 @@ def load_counts_table(table_path, index_col="old_locus_tag"): Besides the columns containing the counts for each codon, there are other columns containing various pieces of information regarding those genes. + + If *index_unique* is True, a ValueError error will be raised if + some elements in the the column *index_col* are not unique. """ render_md(f"Loading data from [{table_path}]({table_path})...\n") codon_counts = pd.read_table(table_path, index_col=index_col) + if index_unique and not(codon_counts.index.is_unique): + raise ValueError(f"Index {index_col} contains repeated values.\n") nb_genes = len(codon_counts) render_md( f""" @@ -144,9 +150,12 @@ def compute_criteria(codon_counts): display(criteria.agg(sum)) render_md("Upset plot of the non-empty categories:\n\n") fig = plt.figure() - UpSet( - from_indicators(*[criteria.columns], data=criteria), - show_counts=True).plot(fig=fig) + try: + UpSet( + from_indicators(*[criteria.columns], data=criteria), + show_counts=True).plot(fig=fig) + except AttributeError: + pass display(fig) plt.close(fig) gene_sets = { @@ -261,13 +270,16 @@ might be excluded. return criteria -def make_counts_only(counts_table): +def make_counts_only( + counts_table, + seq_id_kw="locus_tag", alt_tag_kw="old_locus_tag"): """ Integrate "informative" columns of *counts_table* into the index. """ # To ensure a stable order: ref_info_cols = [ - "old_locus_tag", "locus_tag", "length", + alt_tag_kw, seq_id_kw, + "start", "end", "length", "start_codon", "expected_start_aa", "first_stop", "nb_stops", "start_upstream", "end_downstream"] # To ensure no info columns are lost: