From d4c6916b9d3a2c2e384798e0f32ade51f2377d90 Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li@normalesup.org>
Date: Wed, 9 Aug 2017 17:42:56 +0200
Subject: [PATCH] Moved stuff to common libraries, test size factor.

---
 libhts/__init__.py |  2 +-
 libhts/libhts.py   | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/libhts/__init__.py b/libhts/__init__.py
index 620b369..90bb273 100644
--- a/libhts/__init__.py
+++ b/libhts/__init__.py
@@ -1 +1 @@
-from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, status_setter
+from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, plot_counts_distribution, plot_norm_correlations, size_factor_correlations, status_setter
diff --git a/libhts/libhts.py b/libhts/libhts.py
index 767e520..d932935 100644
--- a/libhts/libhts.py
+++ b/libhts/libhts.py
@@ -13,6 +13,7 @@ import pandas as pd
 from scipy.stats.stats import pearsonr
 # To compute geometric mean
 from scipy.stats.mstats import gmean
+import seaborn as sns
 from rpy2.robjects import r, pandas2ri, Formula, StrVector
 as_df = r("as.data.frame")
 from rpy2.rinterface import RRuntimeError
@@ -115,6 +116,31 @@ def size_factor_correlations(counts_data, summaries, normalizer):
     return (counts_data / size_factors).apply(compute_pearsonr_with_size_factor, axis=1)
 
 
+def plot_norm_correlations(correlations):
+    #fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True)
+    #correlations.plot.kde(ax=ax1)
+    #sns.violinplot(data=correlations, orient="h", ax=ax2)
+    #ax2.set_xlabel("Pearson correlation coefficient")
+    ax = sns.violinplot(data=correlations, cut=0)
+    ax.set_ylabel("Pearson correlation coefficient")
+
+
+def plot_counts_distribution(data, xlabel):
+    # TODO: try to plot with semilog x axis
+    #ax = data.plot.kde(legend=None)
+    #ax.set_xlabel(xlabel)
+    #ax.legend(ncol=len(REPS))
+    try:
+        ax = data.plot.kde()
+    except ValueError as e:
+        msg = "".join([
+            "There seems to be a problem with the data.\n",
+            "The data matrix has %d lines and %d columns.\n" % (len(data), len(data.columns))])
+        warnings.warn(msg)
+        raise
+    ax.set_xlabel(xlabel)
+
+
 def status_setter(lfc_cutoffs=None):
     if lfc_cutoffs is None:
         lfc_cutoffs = [0.5, 1, 2]
-- 
GitLab