Skip to content
Snippets Groups Projects
Commit f630e4fc authored by Blaise Li's avatar Blaise Li
Browse files

It seems access to DESeq2 size factors changed.

parent c30f9b7a
No related branches found
No related tags found
No related merge requests found
......@@ -12,11 +12,9 @@ import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects import r, pandas2ri, Formula, StrVector
as_df = r("as.data.frame")
#from rpy2.rinterface import RRuntimeError
from rpy2.robjects.packages import importr
from rpy2.robjects.conversion import localconverter
deseq2 = importr("DESeq2")
#import gc
def do_deseq2(cond_names, conditions, counts_data,
......@@ -46,6 +44,9 @@ def do_deseq2(cond_names, conditions, counts_data,
"Expect failures while loading data in DESeq2.\n")
# http://stackoverflow.com/a/31206596/1878788
pandas2ri.activate() # makes some conversions automatic
## debug
py2rpy_ndarray_issue = """Conversion 'py2rpy' not defined for objects of type '<class 'numpy.ndarray'>'"""
##
# r_counts_data = pandas2ri.py2ri(counts_data)
# r_col_data = pandas2ri.py2ri(col_data)
# r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib"))
......@@ -60,7 +61,7 @@ def do_deseq2(cond_names, conditions, counts_data,
# countData=counts_data,
# colData=col_data,
# design=formula)
# except RRuntimeError as e:
# except RuntimeError as e:
# # TODO: remove this debugging thing, or use a unique path
# # and issue a warning that indicates the path to the debug file
# col_data.to_csv("/tmp/col_data_debug.txt", sep="\t")
......@@ -68,8 +69,6 @@ def do_deseq2(cond_names, conditions, counts_data,
# raise
try:
dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="ratio")
#gc.collect()
#except RRuntimeError as e:
except RuntimeError as e:
if sum(counts_data.prod(axis=1)) == 0:
msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e,
......@@ -78,13 +77,10 @@ def do_deseq2(cond_names, conditions, counts_data,
warnings.warn(msg)
try:
dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="poscounts")
#gc.collect()
#except RRuntimeError as e:
except RuntimeError as e:
msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e,
"We give up."])
warnings.warn(msg)
#gc.collect()
raise
#print(counts_data.dtypes)
#print(counts_data.columns)
......@@ -93,44 +89,69 @@ def do_deseq2(cond_names, conditions, counts_data,
else:
raise
with localconverter(ro.default_converter + pandas2ri.converter):
size_factors = ro.conversion.rpy2py(as_df(deseq2.sizeFactors_DESeqDataSet(dds)))
size_factors = deseq2.sizeFactors_DESeqDataSet(dds)
# d2_sfs = deseq2.sizeFactors_DESeqDataSet(dds)
# try:
# size_factors_rdf = as_df(d2_sfs)
# size_factors = ro.conversion.rpy2py(size_factors_rdf)
# except NotImplementedError as e:
# if str(e) == py2rpy_ndarray_issue:
# warnings.warn(str(e))
# warnings.warn("d2_sfs:\n")
# warnings.warn(str(d2_sfs))
# warnings.warn("\n")
# size_factors = d2_sfs
# else:
# raise
#try:
# size_factors = d2_sfs
#except NotImplementedError as e:
# if str(e) == py2rpy_ndarray_issue:
# warnings.warn(str(e))
# warnings.warn("size_factors_rdf:\n")
# warnings.warn(str(size_factors_rdf))
# warnings.warn("\n")
# raise
#for cond in cond_names:
# #s = size_factors.loc[cond][0]
# #(*_, s) = size_factors.loc[cond]
#pd.DataFrame({cond : size_factors.loc[cond][0] for cond in COND_NAMES}, index=('size_factor',))
try:
dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="parametric")
#gc.collect()
#except RRuntimeError as e:
except RuntimeError as e:
msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
"We will try with fitType=\"local\"."])
warnings.warn(msg)
try:
dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="local")
#gc.collect()
#except RRuntimeError as e:
except RuntimeError as e:
msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
"We will try with fitType=\"mean\"."])
warnings.warn(msg)
try:
dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="mean")
#gc.collect()
#except RRuntimeError as e:
except RuntimeError as e:
msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
"We give up."])
warnings.warn(msg)
#gc.collect()
raise
dds = deseq2.nbinomWaldTest(dds, betaPrior=deseq2_args["betaPrior"])
#gc.collect()
with localconverter(ro.default_converter + pandas2ri.converter):
res = ro.conversion.rpy2py(as_df(deseq2.results(
res_rdf = as_df(deseq2.results(
dds,
contrast=contrast,
addMLE=deseq2_args["addMLE"],
independentFiltering=deseq2_args["independentFiltering"])))
independentFiltering=deseq2_args["independentFiltering"]))
try:
res = ro.conversion.rpy2py(res_rdf)
except NotImplementedError as e:
if str(e) == py2rpy_ndarray_issue:
warnings.warn(str(e))
warnings.warn("res_rdf:\n")
warnings.warn(str(rdf))
warnings.warn("\n")
raise
res.index = counts_data.index
return res, {cond : size_factors.loc[cond][0] for cond in cond_names}
# return res, {cond : size_factors.loc[cond][0] for cond in cond_names}
# Hopefully the order in size_factors is the same as in cond_names
return res, dict(zip(cond_names, size_factors))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment