From 4d3532cce68351786cefc1bcc6d9367a96bf1192 Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li@normalesup.org>
Date: Tue, 8 Aug 2017 17:32:14 +0200
Subject: [PATCH] Fix error in DE status determination.

---
 libhts/__init__.py |  2 +-
 libhts/libhts.py   | 24 ++++++++++++++++++++++--
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/libhts/__init__.py b/libhts/__init__.py
index 7090e6c..620b369 100644
--- a/libhts/__init__.py
+++ b/libhts/__init__.py
@@ -1 +1 @@
-from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, status_setter
+from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, status_setter
diff --git a/libhts/libhts.py b/libhts/libhts.py
index d100813..767e520 100644
--- a/libhts/libhts.py
+++ b/libhts/libhts.py
@@ -9,6 +9,8 @@ def formatwarning(message, category, filename, lineno, line):
 warnings.formatwarning = formatwarning
 
 import pandas as pd
+# To compute correlation coefficient
+from scipy.stats.stats import pearsonr
 # To compute geometric mean
 from scipy.stats.mstats import gmean
 from rpy2.robjects import r, pandas2ri, Formula, StrVector
@@ -97,6 +99,22 @@ def median_ratio_to_pseudo_ref_size_factors(counts_data):
     return counts_data[counts_data.prod(axis=1) > 0].apply(
         median_ratio_to_pseudo_ref, axis=0)
 
+
+def size_factor_correlations(counts_data, summaries, normalizer):
+    """Is there a correlation, across libraries, between normalized values and size factors?
+    The size factor type *normalizer* is either computed or taken from *summaries*.
+    The normalized data are computed by dividing *counts_data* by this size factor."""
+    if normalizer == "median_ratio_to_pseudo_ref":
+        size_factors = median_ratio_to_pseudo_ref_size_factors(counts_data)
+    else:
+        size_factors = summaries.loc[normalizer]
+    #by_norm = counts_data / size_factors
+    def compute_pearsonr_with_size_factor(row):
+        return pearsonr(row, size_factors)[0]
+    #return by_norm.apply(compute_pearsonr_with_size_factor, axis=1)
+    return (counts_data / size_factors).apply(compute_pearsonr_with_size_factor, axis=1)
+
+
 def status_setter(lfc_cutoffs=None):
     if lfc_cutoffs is None:
         lfc_cutoffs = [0.5, 1, 2]
@@ -107,13 +125,15 @@ def status_setter(lfc_cutoffs=None):
             #if row["log2FoldChange"] > 0:
             lfc = row["lfcMLE"]
             if lfc > 0:
+                # Start from the highest cutoff,
+                # and decrease until below lfc
                 for cutoff in sorted(lfc_cutoffs, reverse=True):
                     if lfc > cutoff:
                         return f"up{cutoff}"
                 return "up"
             else:
-                for cutoff in sorted(lfc_cutoffs):
-                    if lfc < cutoff:
+                for cutoff in sorted(lfc_cutoffs, reverse=True):
+                    if lfc < -cutoff:
                         return f"down{cutoff}"
                 return "down"
         else:
-- 
GitLab