From 598c606db435d6bc3c2de108d2d3da590580d1c4 Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li__git@nsup.org>
Date: Thu, 17 Mar 2022 22:45:05 +0100
Subject: [PATCH] Function to write genes list from clusters.

---
 libcodonusage/__init__.py      |  1 +
 libcodonusage/libcodonusage.py | 45 ++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py
index 9e9c9d7..45910b8 100644
--- a/libcodonusage/__init__.py
+++ b/libcodonusage/__init__.py
@@ -24,4 +24,5 @@ from .libcodonusage import (
     violin_usage_vertical,
     violin_usage_by_clusters,
     violin_with_thresh,
+    write_cluster_lists,
     )
diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py
index 72bd6d1..1889d81 100644
--- a/libcodonusage/libcodonusage.py
+++ b/libcodonusage/libcodonusage.py
@@ -719,6 +719,51 @@ def load_bias_table(table_path, nb_info_cols=9, nb_cluster_series=2):
         header=[0, 1])
 
 
+def star2stop(text):
+    """
+    Replace stars with "stop", for use in file paths.
+    """
+    return text.replace("*", "stop")
+
+
+def write_cluster_lists(
+        usage_table, aa, clusters_dir,
+        cluster_level_template,
+        y_label_template):
+    """
+    """
+    md_report = f"* Clusters for {aa}:\n\n"
+    aa_dir = clusters_dir.joinpath(star2stop(aa))
+    aa_dir.mkdir(parents=True, exist_ok=True)
+    # key: cluster
+    # value: relative path to a file containing old locus tags
+    #        for genes belonging to this cluster
+    relpaths_to_cluster_lists = {}
+    for (cluster, gene_list) in groupby(itemgetter(0), zip(
+        usage_table.index.get_level_values(cluster_level_template.format(aa=aa)),
+        usage_table.index.get_level_values("old_locus_tag"))).items():
+        path_to_clusterfile = aa_dir.joinpath(star2stop(f"{cluster}.txt"))
+        with path_to_clusterfile.open("w") as clust_fh:
+            clust_fh.write("\n".join(map(itemgetter(1), gene_list)))
+        relpaths_to_cluster_lists[cluster] = str(path_to_clusterfile.relative_to('.'))
+    md_report += "\n\n".join([
+        f"    - [{cluster}]({relpath_to_list})"
+        for (cluster, relpath_to_list) in relpaths_to_cluster_lists.items()])
+    violin_usage_by_clusters(
+        usage_table,
+        aa,
+        y_label_template,
+        cluster_level_template=cluster_level_template,
+        vertical=True)
+    path_to_fig = aa_dir.joinpath(star2stop(
+        f"usage_biases_violin_plots_by_cluster_for_{aa}.png"))
+    plt.savefig(path_to_fig, metadata={'creationDate': None})
+    plt.close()
+    relpath_to_fig = str(path_to_fig.relative_to('.'))
+    md_report += f"\n\n    - [Violin plots for {aa} clusters]({relpath_to_fig})\n\n"
+    return md_report
+
+
 def boxplot_usage(usage_table, ylabel, whiskers="1.5 IQR"):
     """
     Plot a boxplot from pandas DataFrame *usage_table*.
-- 
GitLab