Commit fd4e0679 authored by Blaise Li's avatar Blaise Li
Browse files

Moved DESEq2 calling function in its own module.

parent 2a4d2234
# Compiled python modules.
*.pyc
# Setuptools distribution folder.
/dist/
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
# Backups
*~
from .libhts import ( from .libhts import (
do_deseq2, gtf_2_genes_exon_lengths, gtf_2_genes_exon_lengths,
id_list_gtf2bed, id_list_gtf2bed,
make_empty_bigwig, make_empty_bigwig,
make_seeding_function, make_seeding_function,
......
...@@ -33,11 +33,11 @@ TEX_PARAMS = { ...@@ -33,11 +33,11 @@ TEX_PARAMS = {
} }
mpl.rcParams.update(TEX_PARAMS) mpl.rcParams.update(TEX_PARAMS)
import seaborn as sns import seaborn as sns
from rpy2.robjects import r, pandas2ri, Formula, StrVector # from rpy2.robjects import r, pandas2ri, Formula, StrVector
as_df = r("as.data.frame") # as_df = r("as.data.frame")
from rpy2.rinterface import RRuntimeError # from rpy2.rinterface import RRuntimeError
from rpy2.robjects.packages import importr # from rpy2.robjects.packages import importr
deseq2 = importr("DESeq2") # deseq2 = importr("DESeq2")
from pybedtools import BedTool from pybedtools import BedTool
import pyBigWig import pyBigWig
import networkx as nx import networkx as nx
...@@ -410,88 +410,88 @@ def plot_boxplots(data, ylabel): ...@@ -410,88 +410,88 @@ def plot_boxplots(data, ylabel):
############ ############
# DE stuff # # DE stuff #
############ ############
def do_deseq2(cond_names, conditions, counts_data, # def do_deseq2(cond_names, conditions, counts_data,
formula=None, contrast=None, deseq2_args=None): # formula=None, contrast=None, deseq2_args=None):
"""Runs a DESeq2 differential expression analysis.""" # """Runs a DESeq2 differential expression analysis."""
if formula is None: # if formula is None:
formula = Formula("~ lib") # formula = Formula("~ lib")
if contrast is None: # if contrast is None:
# FIXME: MUT and REF are not defined # # FIXME: MUT and REF are not defined
# Maybe just make (formula and) contrast mandatory # # Maybe just make (formula and) contrast mandatory
contrast = StrVector(["lib", MUT, REF]) # contrast = StrVector(["lib", MUT, REF])
if deseq2_args is None: # if deseq2_args is None:
deseq2_args = {"betaPrior" : True, "addMLE" : True, "independentFiltering" : True} # deseq2_args = {"betaPrior" : True, "addMLE" : True, "independentFiltering" : True}
col_data = pd.DataFrame(conditions).assign( # col_data = pd.DataFrame(conditions).assign(
cond_name=pd.Series(cond_names).values).set_index("cond_name") # cond_name=pd.Series(cond_names).values).set_index("cond_name")
# In case we want contrasts between factor combinations # # In case we want contrasts between factor combinations
if ("lib" in col_data.columns) and ("treat" in col_data.columns): # if ("lib" in col_data.columns) and ("treat" in col_data.columns):
col_data = col_data.assign( # col_data = col_data.assign(
lib_treat = ["%s_%s" % (lib, treat) for (lib, treat) in zip( # lib_treat = ["%s_%s" % (lib, treat) for (lib, treat) in zip(
col_data["lib"], col_data["treat"])]) # col_data["lib"], col_data["treat"])])
# http://stackoverflow.com/a/31206596/1878788 # # http://stackoverflow.com/a/31206596/1878788
pandas2ri.activate() # makes some conversions automatic # pandas2ri.activate() # makes some conversions automatic
# r_counts_data = pandas2ri.py2ri(counts_data) # # r_counts_data = pandas2ri.py2ri(counts_data)
# r_col_data = pandas2ri.py2ri(col_data) # # r_col_data = pandas2ri.py2ri(col_data)
# r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib")) # # r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib"))
dds = deseq2.DESeqDataSetFromMatrix( # dds = deseq2.DESeqDataSetFromMatrix(
countData=counts_data, # countData=counts_data,
colData=col_data, # colData=col_data,
design=formula) # design=formula)
# dds = deseq2.DESeq(dds, betaPrior=deseq2_args["betaPrior"]) # # dds = deseq2.DESeq(dds, betaPrior=deseq2_args["betaPrior"])
# Decompose into the 3 steps to have more control on the options # # Decompose into the 3 steps to have more control on the options
try: # try:
dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="ratio") # dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="ratio")
except RRuntimeError as e: # except RRuntimeError as e:
if sum(counts_data.prod(axis=1)) == 0: # if sum(counts_data.prod(axis=1)) == 0:
msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e, # msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e,
"This is probably because every gene has at least one zero.\n", # "This is probably because every gene has at least one zero.\n",
"We will try to use the \"poscounts\" method instead."]) # "We will try to use the \"poscounts\" method instead."])
warnings.warn(msg) # warnings.warn(msg)
try: # try:
dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="poscounts") # dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="poscounts")
except RRuntimeError as e: # except RRuntimeError as e:
msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e, # msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e,
"We give up."]) # "We give up."])
warnings.warn(msg) # warnings.warn(msg)
raise # raise
#print(counts_data.dtypes) # #print(counts_data.dtypes)
#print(counts_data.columns) # #print(counts_data.columns)
#print(len(counts_data)) # #print(len(counts_data))
#raise # #raise
else: # else:
raise # raise
size_factors = pandas2ri.ri2py(as_df(deseq2.sizeFactors_DESeqDataSet(dds))) # size_factors = pandas2ri.ri2py(as_df(deseq2.sizeFactors_DESeqDataSet(dds)))
#for cond in cond_names: # #for cond in cond_names:
# #s = size_factors.loc[cond][0] # # #s = size_factors.loc[cond][0]
# #(*_, s) = size_factors.loc[cond] # # #(*_, s) = size_factors.loc[cond]
#pd.DataFrame({cond : size_factors.loc[cond][0] for cond in COND_NAMES}, index=('size_factor',)) # #pd.DataFrame({cond : size_factors.loc[cond][0] for cond in COND_NAMES}, index=('size_factor',))
try: # try:
dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="parametric") # dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="parametric")
except RRuntimeError as e: # except RRuntimeError as e:
msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, # msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
"We will try with fitType=\"local\"."]) # "We will try with fitType=\"local\"."])
warnings.warn(msg) # warnings.warn(msg)
try: # try:
dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="local") # dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="local")
except RRuntimeError as e: # except RRuntimeError as e:
msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, # msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
"We will try with fitType=\"mean\"."]) # "We will try with fitType=\"mean\"."])
warnings.warn(msg) # warnings.warn(msg)
try: # try:
dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="mean") # dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="mean")
except RRuntimeError as e: # except RRuntimeError as e:
msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, # msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
"We give up."]) # "We give up."])
warnings.warn(msg) # warnings.warn(msg)
raise # raise
dds = deseq2.nbinomWaldTest(dds, betaPrior=deseq2_args["betaPrior"]) # dds = deseq2.nbinomWaldTest(dds, betaPrior=deseq2_args["betaPrior"])
res = pandas2ri.ri2py(as_df(deseq2.results( # res = pandas2ri.ri2py(as_df(deseq2.results(
dds, # dds,
contrast=contrast, # contrast=contrast,
addMLE=deseq2_args["addMLE"], # addMLE=deseq2_args["addMLE"],
independentFiltering=deseq2_args["independentFiltering"]))) # independentFiltering=deseq2_args["independentFiltering"])))
res.index = counts_data.index # res.index = counts_data.index
return res, {cond : size_factors.loc[cond][0] for cond in cond_names} # return res, {cond : size_factors.loc[cond][0] for cond in cond_names}
# Cutoffs in log fold change # Cutoffs in log fold change
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment