diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index 7ea81e160a1c1a2e6eeeaf43aea9cdc034f7e73e..b2de77858389f3505a41be49b8a9dbd90543a154 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -1,6 +1,6 @@ __copyright__ = "Copyright (C) 2022 Blaise Li" __licence__ = "GNU GPLv3" -__version__ = "0.20" +__version__ = "0.21" from .libcodonusage import ( aa2colour, aa_usage, @@ -26,6 +26,7 @@ from .libcodonusage import ( save_counts_table, sort_counts_by_aa, star2stop, + to_long_form, violin_usage, violin_usage_vertical, violin_usage_by_clusters, diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index fbf886ede2f425e22054723235ba0cf187eeef92..252ea02cbd8c3847c891d0eec08017e0fe91895c 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -16,7 +16,7 @@ """Functions used in Jupyter notebooks.""" from itertools import combinations import json -from operator import itemgetter +from operator import attrgetter, itemgetter from pathlib import Path # python3 -m pip install cytoolz from cytoolz import concat, groupby, unique @@ -705,8 +705,8 @@ def compare_clusterings( *clust1_template* and *clust2_template*. """ crosstabs = {} - for aa in aa2colour.keys(): - if aa == "M" or aa == "W": + for aa in aa2colour.keys(): # pylint: disable=C0103 + if aa in {"M", "W"}: continue clust1_level = clust1_template.format(aa=aa) clust2_level = clust2_template.format(aa=aa) @@ -1081,7 +1081,7 @@ def boxplot_usage(usage_table, ylabel, whiskers="1.5 IQR"): axis.set_ylabel(ylabel) -def to_long_form(usage_table, ylabel): +def to_long_form(usage_table, ylabel, others=None): """ Transform data in *usage_table* into "long form". @@ -1096,17 +1096,49 @@ def to_long_form(usage_table, ylabel): level=list(range(col_nb_levels))).to_frame().reset_index( level=list(range( row_nb_levels, row_nb_levels + col_nb_levels))) + if others is None: + others = [] if col_nb_levels == 1: long_form.columns = ["aa", ylabel] + others = [other for other in others if other != "aa"] elif col_nb_levels == 2: long_form.columns = ["aa", "codon", ylabel] + others = [other for other in others if other not in {"aa", "codon"}] else: raise NotImplementedError( "Don't know how to deal with column headers " "with more than 2 levels ('aa' and 'codon')\n") - return long_form.reset_index(level=[ + long_form = long_form.reset_index(level=[ lvl_name for lvl_name in usage_table.index.names if lvl_name.startswith("cluster")]) + if not set(others).issubset(set(long_form.index.names)): + print(set(others)) + print(set(long_form.index.names)) + raise ValueError( + "All elements in *others* should be in the index " + "of *usage_table*") + return long_form.assign(**{ + other: long_form.index.get_level_values(other) for other in others}) + + +def variable2order(variable): + """ + Define the order of amino-acids or codons. + """ + if variable == "aa": + return list(columns_by_aa.keys()) + if variable == "codon": + return [codon for (_, codon) in concat(columns_by_aa.values())] + raise ValueError("variable can only be 'aa' or 'codon'.\n") + + +def format_codon_labels(codons): + """ + Add amino-acid information to codons meant to be used as tick labels. + """ + return [ + plt.Text(x, y, f"{codon} ({codon2aa[codon]})") + for (x, y, codon) in map(attrgetter("_x", "_y", "_text"), codons)] def violin_usage(usage_table, variable, ylabel, hue="aa", axis=None): @@ -1115,7 +1147,7 @@ def violin_usage(usage_table, variable, ylabel, hue="aa", axis=None): *variable* should be either "codon" or "aa". """ - long_form = to_long_form(usage_table, ylabel) + long_form = to_long_form(usage_table, ylabel, others=[hue]) if axis is None: _, axis = plt.subplots(figsize=(18, 6)) do_legend = True @@ -1125,18 +1157,22 @@ def violin_usage(usage_table, variable, ylabel, hue="aa", axis=None): palette = aa2colour else: palette = None - if variable == "aa": - order = [aa for aa in columns_by_aa.keys()] - elif variable == "codon": - order = [codon for (_, codon) in concat(columns_by_aa.values())] - else: - raise ValueError(f"variable can only be 'aa' or 'codon'.\n") - sns.violinplot(x=variable, y=ylabel, order=order, + # if variable == "aa": + # order = list(columns_by_aa.keys()) + # elif variable == "codon": + # order = [codon for (_, codon) in concat(columns_by_aa.values())] + # else: + # raise ValueError("variable can only be 'aa' or 'codon'.\n") + sns.violinplot(x=variable, y=ylabel, order=variable2order(variable), hue=hue, palette=palette, dodge=False, data=long_form, ax=axis, orient="v", scale="count") if do_legend: plt.legend(bbox_to_anchor=(1.01, 1), borderaxespad=0) - axis.set_xticklabels(axis.get_xticklabels(), rotation=90) + if variable == "codon": + ticklabels = format_codon_labels(axis.get_xticklabels()) + else: + ticklabels = axis.get_xticklabels() + axis.set_xticklabels(ticklabels, rotation=90) return axis @@ -1146,23 +1182,28 @@ def violin_usage_vertical(usage_table, variable, ylabel, hue="aa", axis=None): *variable* should be either "codon" or "aa". """ - long_form = to_long_form(usage_table, ylabel) + long_form = to_long_form(usage_table, ylabel, others=[hue]) if axis is None: _, axis = plt.subplots(figsize=(6, 44)) if hue == "aa": palette = aa2colour else: palette = None - if variable == "aa": - order = [aa for aa in columns_by_aa.keys()] - elif variable == "codon": - order = [codon for (_, codon) in concat(columns_by_aa.values())] - else: - raise ValueError(f"variable can only be 'aa' or 'codon'.\n") + # if variable == "aa": + # order = list(columns_by_aa.keys()) + # elif variable == "codon": + # order = [codon for (_, codon) in concat(columns_by_aa.values())] + # else: + # raise ValueError("variable can only be 'aa' or 'codon'.\n") sns.violinplot( - y=variable, x=ylabel, order=order, + y=variable, x=ylabel, order=variable2order(variable), hue=hue, palette=palette, dodge=False, data=long_form, ax=axis, orient="h", scale="count") + if variable == "codon": + ticklabels = format_codon_labels(axis.get_yticklabels()) + else: + ticklabels = axis.get_xticklabels() + axis.set_yticklabels(ticklabels) return axis