diff --git a/libhts/__init__.py b/libhts/__init__.py index 620b3694012665cc2a1c57ade333bedde589be50..90bb2735df87a88de27e3e90b9ae4369bceb1f7b 100644 --- a/libhts/__init__.py +++ b/libhts/__init__.py @@ -1 +1 @@ -from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, status_setter +from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, plot_counts_distribution, plot_norm_correlations, size_factor_correlations, status_setter diff --git a/libhts/libhts.py b/libhts/libhts.py index 767e520835cdf74fe12449dd3f93dedb08841182..d93293542c97a160acf2573704662dea2695d3a1 100644 --- a/libhts/libhts.py +++ b/libhts/libhts.py @@ -13,6 +13,7 @@ import pandas as pd from scipy.stats.stats import pearsonr # To compute geometric mean from scipy.stats.mstats import gmean +import seaborn as sns from rpy2.robjects import r, pandas2ri, Formula, StrVector as_df = r("as.data.frame") from rpy2.rinterface import RRuntimeError @@ -115,6 +116,31 @@ def size_factor_correlations(counts_data, summaries, normalizer): return (counts_data / size_factors).apply(compute_pearsonr_with_size_factor, axis=1) +def plot_norm_correlations(correlations): + #fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True) + #correlations.plot.kde(ax=ax1) + #sns.violinplot(data=correlations, orient="h", ax=ax2) + #ax2.set_xlabel("Pearson correlation coefficient") + ax = sns.violinplot(data=correlations, cut=0) + ax.set_ylabel("Pearson correlation coefficient") + + +def plot_counts_distribution(data, xlabel): + # TODO: try to plot with semilog x axis + #ax = data.plot.kde(legend=None) + #ax.set_xlabel(xlabel) + #ax.legend(ncol=len(REPS)) + try: + ax = data.plot.kde() + except ValueError as e: + msg = "".join([ + "There seems to be a problem with the data.\n", + "The data matrix has %d lines and %d columns.\n" % (len(data), len(data.columns))]) + warnings.warn(msg) + raise + ax.set_xlabel(xlabel) + + def status_setter(lfc_cutoffs=None): if lfc_cutoffs is None: lfc_cutoffs = [0.5, 1, 2]