From 8ca17aef73cc8d8948a4a59b35fbcbcb545a2407 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Tue, 12 Apr 2022 15:42:24 +0200 Subject: [PATCH] Figure size tweaks, violins by chromosome. --- libcodonusage/__init__.py | 1 + libcodonusage/libcodonusage.py | 81 +++++++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index b2de778..3c99cc9 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -30,6 +30,7 @@ from .libcodonusage import ( violin_usage, violin_usage_vertical, violin_usage_by_clusters, + violin_usage_by_clusters_splitby, violin_with_thresh, write_cluster_lists, ) diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index a4e56a0..ad02e60 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -1154,9 +1154,17 @@ def violin_usage( *variable* should be either "codon" or "aa". """ + if hue in {"aa", "codon"}: + dodge = False + nb_violins = 1 + else: + dodge = True + # dodge = True implies there will be one violin + # per possible value of *hue*, side by side + nb_violins = len(usage_table.index.get_level_values(hue).unique()) long_form = to_long_form(usage_table, ylabel, others=[hue]) if axis is None: - _, axis = plt.subplots(figsize=(18, 6)) + _, axis = plt.subplots(figsize=(18 * nb_violins, 6)) do_legend = True else: do_legend = False @@ -1164,10 +1172,6 @@ def violin_usage( palette = aa2colour else: palette = None - if hue in {"aa", "codon"}: - dodge = False - else: - dodge = True kwargs = { "x": variable, "y": ylabel, "order": variable2order(variable), "hue": hue, "palette": palette, "dodge": dodge, @@ -1196,17 +1200,21 @@ def violin_usage_vertical( *variable* should be either "codon" or "aa". """ + if hue in {"aa", "codon"}: + dodge = False + nb_violins = 1 + else: + dodge = True + # dodge = True implies there will be one violin + # per possible value of *hue*, side by side + nb_violins = len(usage_table.index.get_level_values(hue).unique()) long_form = to_long_form(usage_table, ylabel, others=[hue]) if axis is None: - _, axis = plt.subplots(figsize=(6, 44)) + _, axis = plt.subplots(figsize=(6, 44 * nb_violins)) if hue == "aa": palette = aa2colour else: palette = None - if hue in {"aa", "codon"}: - dodge = False - else: - dodge = True kwargs = { "y": variable, "x": ylabel, "order": variable2order(variable), @@ -1235,7 +1243,7 @@ def violin_usage_by_clusters(usage_with_clusters, aa, # pylint: disable=C0103 Plot a series of violin plots for each cluster of genes. The clusters are defined in usage table *usage_with_clusters* - based on usage biases for the codons codong for amino-acid *aa*. + based on usage biases for the codons coding for amino-acid *aa*. *usage_with_clusters* should have clustering indications as an index level named according to *cluster_level_template* @@ -1243,14 +1251,25 @@ def violin_usage_by_clusters(usage_with_clusters, aa, # pylint: disable=C0103 """ clusters = usage_with_clusters.groupby( level=cluster_level_template.format(aa=aa)) + # To adjust figure size + if "hue" in violin_kwargs and violin_kwargs["hue"] not in {"aa", "codon"}: + # dodge will be set to True in later plotting call. + # This implies that there will be one violin + # per possible value of *hue*, side by side + nb_violins = len(usage_with_clusters.index.get_level_values( + violin_kwargs["hue"]).unique()) + else: + nb_violins = 1 if vertical: - _, axes = plt.subplots( + fig, axes = plt.subplots( ncols=clusters.ngroups, - figsize=(6 * clusters.ngroups, 44)) + constrained_layout=True, + figsize=(6 * clusters.ngroups, 44 * nb_violins)) else: - _, axes = plt.subplots( + fig, axes = plt.subplots( nrows=clusters.ngroups, - figsize=(18, 6 * clusters.ngroups)) + constrained_layout=True, + figsize=(18 * nb_violins, 6 * clusters.ngroups)) for ((cluster, usage_table), axis) in zip(clusters, axes): kwargs = {"axis": axis} kwargs.update(violin_kwargs) @@ -1264,6 +1283,38 @@ def violin_usage_by_clusters(usage_with_clusters, aa, # pylint: disable=C0103 usage_table, "codon", ylabel_template.format(aa=aa, cluster=cluster), **kwargs) + return fig + + +def violin_usage_by_clusters_splitby( + usage_with_clusters, aa, # pylint: disable=C0103 + ylabel_template, + cluster_level_template="cluster_{aa}", + vertical=False, by_lvl="chrom", + **violin_kwargs): + """ + Plot a series of violin plots for each cluster of genes, + splitting the figures according to groups defined by the + content of the index level *by_lvl* of *usage_with_clusters*. + + The clusters are defined in usage table *usage_with_clusters* + based on usage biases for the codons coding for amino-acid *aa*. + + *usage_with_clusters* should have clustering indications as + an index level named according to *cluster_level_template* + (default "cluster_{aa}"), where {aa} is to be replaced by *aa*. + """ + classes = sorted(set( + usage_with_clusters.index.get_level_values(by_lvl))) + idx_level = usage_with_clusters.index.names.index(by_lvl) + for clss in classes: + fig = violin_usage_by_clusters( + usage_with_clusters.loc[ + (*(slice(None) for _ in range(idx_level)), clss),], + aa, ylabel_template + f" for {clss}", + cluster_level_template=cluster_level_template, + vertical=vertical, **violin_kwargs) + fig.suptitle(f"Violin plots for {clss}") # Based on -- GitLab