From 4d3532cce68351786cefc1bcc6d9367a96bf1192 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li@normalesup.org> Date: Tue, 8 Aug 2017 17:32:14 +0200 Subject: [PATCH] Fix error in DE status determination. --- libhts/__init__.py | 2 +- libhts/libhts.py | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/libhts/__init__.py b/libhts/__init__.py index 7090e6c..620b369 100644 --- a/libhts/__init__.py +++ b/libhts/__init__.py @@ -1 +1 @@ -from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, status_setter +from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, status_setter diff --git a/libhts/libhts.py b/libhts/libhts.py index d100813..767e520 100644 --- a/libhts/libhts.py +++ b/libhts/libhts.py @@ -9,6 +9,8 @@ def formatwarning(message, category, filename, lineno, line): warnings.formatwarning = formatwarning import pandas as pd +# To compute correlation coefficient +from scipy.stats.stats import pearsonr # To compute geometric mean from scipy.stats.mstats import gmean from rpy2.robjects import r, pandas2ri, Formula, StrVector @@ -97,6 +99,22 @@ def median_ratio_to_pseudo_ref_size_factors(counts_data): return counts_data[counts_data.prod(axis=1) > 0].apply( median_ratio_to_pseudo_ref, axis=0) + +def size_factor_correlations(counts_data, summaries, normalizer): + """Is there a correlation, across libraries, between normalized values and size factors? + The size factor type *normalizer* is either computed or taken from *summaries*. + The normalized data are computed by dividing *counts_data* by this size factor.""" + if normalizer == "median_ratio_to_pseudo_ref": + size_factors = median_ratio_to_pseudo_ref_size_factors(counts_data) + else: + size_factors = summaries.loc[normalizer] + #by_norm = counts_data / size_factors + def compute_pearsonr_with_size_factor(row): + return pearsonr(row, size_factors)[0] + #return by_norm.apply(compute_pearsonr_with_size_factor, axis=1) + return (counts_data / size_factors).apply(compute_pearsonr_with_size_factor, axis=1) + + def status_setter(lfc_cutoffs=None): if lfc_cutoffs is None: lfc_cutoffs = [0.5, 1, 2] @@ -107,13 +125,15 @@ def status_setter(lfc_cutoffs=None): #if row["log2FoldChange"] > 0: lfc = row["lfcMLE"] if lfc > 0: + # Start from the highest cutoff, + # and decrease until below lfc for cutoff in sorted(lfc_cutoffs, reverse=True): if lfc > cutoff: return f"up{cutoff}" return "up" else: - for cutoff in sorted(lfc_cutoffs): - if lfc < cutoff: + for cutoff in sorted(lfc_cutoffs, reverse=True): + if lfc < -cutoff: return f"down{cutoff}" return "down" else: -- GitLab