diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py index 9e9c9d75c6b6ce0c6d409ae5f9b69016f2c86ea0..45910b8f337c1890f6ef98a6c1e752747cac8cf0 100644 --- a/libcodonusage/__init__.py +++ b/libcodonusage/__init__.py @@ -24,4 +24,5 @@ from .libcodonusage import ( violin_usage_vertical, violin_usage_by_clusters, violin_with_thresh, + write_cluster_lists, ) diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 72bd6d15c4cb0a79d572f314a39b77063b7338ba..1889d810a7ebfff493446c0750afd5c9cb1969a5 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -719,6 +719,51 @@ def load_bias_table(table_path, nb_info_cols=9, nb_cluster_series=2): header=[0, 1]) +def star2stop(text): + """ + Replace stars with "stop", for use in file paths. + """ + return text.replace("*", "stop") + + +def write_cluster_lists( + usage_table, aa, clusters_dir, + cluster_level_template, + y_label_template): + """ + """ + md_report = f"* Clusters for {aa}:\n\n" + aa_dir = clusters_dir.joinpath(star2stop(aa)) + aa_dir.mkdir(parents=True, exist_ok=True) + # key: cluster + # value: relative path to a file containing old locus tags + # for genes belonging to this cluster + relpaths_to_cluster_lists = {} + for (cluster, gene_list) in groupby(itemgetter(0), zip( + usage_table.index.get_level_values(cluster_level_template.format(aa=aa)), + usage_table.index.get_level_values("old_locus_tag"))).items(): + path_to_clusterfile = aa_dir.joinpath(star2stop(f"{cluster}.txt")) + with path_to_clusterfile.open("w") as clust_fh: + clust_fh.write("\n".join(map(itemgetter(1), gene_list))) + relpaths_to_cluster_lists[cluster] = str(path_to_clusterfile.relative_to('.')) + md_report += "\n\n".join([ + f" - [{cluster}]({relpath_to_list})" + for (cluster, relpath_to_list) in relpaths_to_cluster_lists.items()]) + violin_usage_by_clusters( + usage_table, + aa, + y_label_template, + cluster_level_template=cluster_level_template, + vertical=True) + path_to_fig = aa_dir.joinpath(star2stop( + f"usage_biases_violin_plots_by_cluster_for_{aa}.png")) + plt.savefig(path_to_fig, metadata={'creationDate': None}) + plt.close() + relpath_to_fig = str(path_to_fig.relative_to('.')) + md_report += f"\n\n - [Violin plots for {aa} clusters]({relpath_to_fig})\n\n" + return md_report + + def boxplot_usage(usage_table, ylabel, whiskers="1.5 IQR"): """ Plot a boxplot from pandas DataFrame *usage_table*.