diff --git a/libdeseq/libdeseq.py b/libdeseq/libdeseq.py index 9e5f37e4f3062027b68d9599260743ab5972134d..e3a42d09b384b43447f35ba7aad5b82b610d705b 100644 --- a/libdeseq/libdeseq.py +++ b/libdeseq/libdeseq.py @@ -12,11 +12,9 @@ import pandas as pd import rpy2.robjects as ro from rpy2.robjects import r, pandas2ri, Formula, StrVector as_df = r("as.data.frame") -#from rpy2.rinterface import RRuntimeError from rpy2.robjects.packages import importr from rpy2.robjects.conversion import localconverter deseq2 = importr("DESeq2") -#import gc def do_deseq2(cond_names, conditions, counts_data, @@ -46,6 +44,9 @@ def do_deseq2(cond_names, conditions, counts_data, "Expect failures while loading data in DESeq2.\n") # http://stackoverflow.com/a/31206596/1878788 pandas2ri.activate() # makes some conversions automatic + ## debug + py2rpy_ndarray_issue = """Conversion 'py2rpy' not defined for objects of type '<class 'numpy.ndarray'>'""" + ## # r_counts_data = pandas2ri.py2ri(counts_data) # r_col_data = pandas2ri.py2ri(col_data) # r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib")) @@ -60,7 +61,7 @@ def do_deseq2(cond_names, conditions, counts_data, # countData=counts_data, # colData=col_data, # design=formula) - # except RRuntimeError as e: + # except RuntimeError as e: # # TODO: remove this debugging thing, or use a unique path # # and issue a warning that indicates the path to the debug file # col_data.to_csv("/tmp/col_data_debug.txt", sep="\t") @@ -68,8 +69,6 @@ def do_deseq2(cond_names, conditions, counts_data, # raise try: dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="ratio") - #gc.collect() - #except RRuntimeError as e: except RuntimeError as e: if sum(counts_data.prod(axis=1)) == 0: msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e, @@ -78,13 +77,10 @@ def do_deseq2(cond_names, conditions, counts_data, warnings.warn(msg) try: dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="poscounts") - #gc.collect() - #except RRuntimeError as e: except RuntimeError as e: msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e, "We give up."]) warnings.warn(msg) - #gc.collect() raise #print(counts_data.dtypes) #print(counts_data.columns) @@ -93,44 +89,69 @@ def do_deseq2(cond_names, conditions, counts_data, else: raise with localconverter(ro.default_converter + pandas2ri.converter): - size_factors = ro.conversion.rpy2py(as_df(deseq2.sizeFactors_DESeqDataSet(dds))) + size_factors = deseq2.sizeFactors_DESeqDataSet(dds) + # d2_sfs = deseq2.sizeFactors_DESeqDataSet(dds) + # try: + # size_factors_rdf = as_df(d2_sfs) + # size_factors = ro.conversion.rpy2py(size_factors_rdf) + # except NotImplementedError as e: + # if str(e) == py2rpy_ndarray_issue: + # warnings.warn(str(e)) + # warnings.warn("d2_sfs:\n") + # warnings.warn(str(d2_sfs)) + # warnings.warn("\n") + # size_factors = d2_sfs + # else: + # raise + #try: + # size_factors = d2_sfs + #except NotImplementedError as e: + # if str(e) == py2rpy_ndarray_issue: + # warnings.warn(str(e)) + # warnings.warn("size_factors_rdf:\n") + # warnings.warn(str(size_factors_rdf)) + # warnings.warn("\n") + # raise #for cond in cond_names: # #s = size_factors.loc[cond][0] # #(*_, s) = size_factors.loc[cond] #pd.DataFrame({cond : size_factors.loc[cond][0] for cond in COND_NAMES}, index=('size_factor',)) try: dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="parametric") - #gc.collect() - #except RRuntimeError as e: except RuntimeError as e: msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, "We will try with fitType=\"local\"."]) warnings.warn(msg) try: dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="local") - #gc.collect() - #except RRuntimeError as e: except RuntimeError as e: msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, "We will try with fitType=\"mean\"."]) warnings.warn(msg) try: dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="mean") - #gc.collect() - #except RRuntimeError as e: except RuntimeError as e: msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, "We give up."]) warnings.warn(msg) - #gc.collect() raise dds = deseq2.nbinomWaldTest(dds, betaPrior=deseq2_args["betaPrior"]) - #gc.collect() with localconverter(ro.default_converter + pandas2ri.converter): - res = ro.conversion.rpy2py(as_df(deseq2.results( + res_rdf = as_df(deseq2.results( dds, contrast=contrast, addMLE=deseq2_args["addMLE"], - independentFiltering=deseq2_args["independentFiltering"]))) + independentFiltering=deseq2_args["independentFiltering"])) + try: + res = ro.conversion.rpy2py(res_rdf) + except NotImplementedError as e: + if str(e) == py2rpy_ndarray_issue: + warnings.warn(str(e)) + warnings.warn("res_rdf:\n") + warnings.warn(str(rdf)) + warnings.warn("\n") + raise res.index = counts_data.index - return res, {cond : size_factors.loc[cond][0] for cond in cond_names} + # return res, {cond : size_factors.loc[cond][0] for cond in cond_names} + # Hopefully the order in size_factors is the same as in cond_names + return res, dict(zip(cond_names, size_factors))