Skip to content
Snippets Groups Projects
Commit 4d3532cc authored by Blaise Li's avatar Blaise Li
Browse files

Fix error in DE status determination.

parent 93f8a8e1
No related branches found
No related tags found
No related merge requests found
from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, status_setter
from .libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, status_setter
......@@ -9,6 +9,8 @@ def formatwarning(message, category, filename, lineno, line):
warnings.formatwarning = formatwarning
import pandas as pd
# To compute correlation coefficient
from scipy.stats.stats import pearsonr
# To compute geometric mean
from scipy.stats.mstats import gmean
from rpy2.robjects import r, pandas2ri, Formula, StrVector
......@@ -97,6 +99,22 @@ def median_ratio_to_pseudo_ref_size_factors(counts_data):
return counts_data[counts_data.prod(axis=1) > 0].apply(
median_ratio_to_pseudo_ref, axis=0)
def size_factor_correlations(counts_data, summaries, normalizer):
"""Is there a correlation, across libraries, between normalized values and size factors?
The size factor type *normalizer* is either computed or taken from *summaries*.
The normalized data are computed by dividing *counts_data* by this size factor."""
if normalizer == "median_ratio_to_pseudo_ref":
size_factors = median_ratio_to_pseudo_ref_size_factors(counts_data)
else:
size_factors = summaries.loc[normalizer]
#by_norm = counts_data / size_factors
def compute_pearsonr_with_size_factor(row):
return pearsonr(row, size_factors)[0]
#return by_norm.apply(compute_pearsonr_with_size_factor, axis=1)
return (counts_data / size_factors).apply(compute_pearsonr_with_size_factor, axis=1)
def status_setter(lfc_cutoffs=None):
if lfc_cutoffs is None:
lfc_cutoffs = [0.5, 1, 2]
......@@ -107,13 +125,15 @@ def status_setter(lfc_cutoffs=None):
#if row["log2FoldChange"] > 0:
lfc = row["lfcMLE"]
if lfc > 0:
# Start from the highest cutoff,
# and decrease until below lfc
for cutoff in sorted(lfc_cutoffs, reverse=True):
if lfc > cutoff:
return f"up{cutoff}"
return "up"
else:
for cutoff in sorted(lfc_cutoffs):
if lfc < cutoff:
for cutoff in sorted(lfc_cutoffs, reverse=True):
if lfc < -cutoff:
return f"down{cutoff}"
return "down"
else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment