diff --git a/.gitmodules b/.gitmodules index 6430ed0ce6bb993f1468d6a2df2aae8a18627167..6b2a21203e297230ba642870d8753d2cc944c759 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "libhts"] path = libhts url = git@gitlab.pasteur.fr:bli/libhts.git +[submodule "libdeseq"] + path = libdeseq + url = git@gitlab.pasteur.fr:bli/libdeseq.git diff --git a/libdeseq b/libdeseq new file mode 160000 index 0000000000000000000000000000000000000000..8df8009e005d16234b2b985f63e83ed25d0fa759 --- /dev/null +++ b/libdeseq @@ -0,0 +1 @@ +Subproject commit 8df8009e005d16234b2b985f63e83ed25d0fa759 diff --git a/libdeseq/.gitignore b/libdeseq/.gitignore deleted file mode 100644 index b4e1667d2aef604250dc2a69f5edb9c94db2a90a..0000000000000000000000000000000000000000 --- a/libdeseq/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -# Compiled python modules. -*.pyc - -# Setuptools distribution folder. -/dist/ - -# Python egg metadata, regenerated from source files by setuptools. -/*.egg-info - -# Backups -*~ diff --git a/libdeseq/install.sh b/libdeseq/install.sh deleted file mode 100755 index 5ddc41a80e7fe6839786c8be3efa902015bc1b2a..0000000000000000000000000000000000000000 --- a/libdeseq/install.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -python3.6 setup.py build_ext -# .egg-link does not work with PYTHONPATH ? -python3.6 -m pip install -e . -python3.6 -m pip install --no-deps --ignore-installed . diff --git a/libdeseq/libdeseq/__init__.py b/libdeseq/libdeseq/__init__.py deleted file mode 100644 index f8c1cb17ff65ac40d92716a4fbe0cfb35f4702e3..0000000000000000000000000000000000000000 --- a/libdeseq/libdeseq/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .libdeseq import ( - do_deseq2) diff --git a/libdeseq/libdeseq/libdeseq.py b/libdeseq/libdeseq/libdeseq.py deleted file mode 100644 index 4d4dd801857e74210238a04b7012a90aece3d93e..0000000000000000000000000000000000000000 --- a/libdeseq/libdeseq/libdeseq.py +++ /dev/null @@ -1,127 +0,0 @@ -import warnings - - -def formatwarning(message, category, filename, lineno, line): - """Used to format warning messages.""" - return "%s:%s: %s: %s\n" % (filename, lineno, category.__name__, message) - - -warnings.formatwarning = formatwarning - -import pandas as pd -from rpy2.robjects import r, pandas2ri, Formula, StrVector -as_df = r("as.data.frame") -from rpy2.rinterface import RRuntimeError -from rpy2.robjects.packages import importr -deseq2 = importr("DESeq2") -#import gc - - -def do_deseq2(cond_names, conditions, counts_data, - formula=None, contrast=None, deseq2_args=None): - """Runs a DESeq2 differential expression analysis.""" - if formula is None: - formula = Formula("~ lib") - if contrast is None: - # FIXME: MUT and REF are not defined - # Maybe just make (formula and) contrast mandatory - contrast = StrVector(["lib", MUT, REF]) - if deseq2_args is None: - deseq2_args = {"betaPrior" : True, "addMLE" : True, "independentFiltering" : True} - col_data = pd.DataFrame(conditions).assign( - cond_name=pd.Series(cond_names).values).set_index("cond_name") - # In case we want contrasts between factor combinations - if ("lib" in col_data.columns) and ("treat" in col_data.columns): - col_data = col_data.assign( - lib_treat = ["%s_%s" % (lib, treat) for (lib, treat) in zip( - col_data["lib"], col_data["treat"])]) - col_data_rownames = list(col_data.index) - counts_data_colnames = list(counts_data.columns) - if col_data_rownames != counts_data_colnames: - warnings.warn( - "The lines in the sample description do not match " - "the columns in the counts table.\n" - "Expect failures while loading data in DESeq2.\n") - # http://stackoverflow.com/a/31206596/1878788 - pandas2ri.activate() # makes some conversions automatic - # r_counts_data = pandas2ri.py2ri(counts_data) - # r_col_data = pandas2ri.py2ri(col_data) - # r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib")) - # dds = deseq2.DESeq(dds, betaPrior=deseq2_args["betaPrior"]) - # Decompose into the 3 steps to have more control on the options - dds = deseq2.DESeqDataSetFromMatrix( - countData=counts_data, - colData=col_data, - design=formula) - # try: - # dds = deseq2.DESeqDataSetFromMatrix( - # countData=counts_data, - # colData=col_data, - # design=formula) - # except RRuntimeError as e: - # # TODO: remove this debugging thing, or use a unique path - # # and issue a warning that indicates the path to the debug file - # col_data.to_csv("/tmp/col_data_debug.txt", sep="\t") - # counts_data.to_csv("/tmp/counts_data_debug.txt", sep="\t") - # raise - try: - dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="ratio") - #gc.collect() - except RRuntimeError as e: - if sum(counts_data.prod(axis=1)) == 0: - msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e, - "This is probably because every gene has at least one zero.\n", - "We will try to use the \"poscounts\" method instead."]) - warnings.warn(msg) - try: - dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="poscounts") - #gc.collect() - except RRuntimeError as e: - msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e, - "We give up."]) - warnings.warn(msg) - #gc.collect() - raise - #print(counts_data.dtypes) - #print(counts_data.columns) - #print(len(counts_data)) - #raise - else: - raise - size_factors = pandas2ri.ri2py(as_df(deseq2.sizeFactors_DESeqDataSet(dds))) - #for cond in cond_names: - # #s = size_factors.loc[cond][0] - # #(*_, s) = size_factors.loc[cond] - #pd.DataFrame({cond : size_factors.loc[cond][0] for cond in COND_NAMES}, index=('size_factor',)) - try: - dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="parametric") - #gc.collect() - except RRuntimeError as e: - msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, - "We will try with fitType=\"local\"."]) - warnings.warn(msg) - try: - dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="local") - #gc.collect() - except RRuntimeError as e: - msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, - "We will try with fitType=\"mean\"."]) - warnings.warn(msg) - try: - dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="mean") - #gc.collect() - except RRuntimeError as e: - msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e, - "We give up."]) - warnings.warn(msg) - #gc.collect() - raise - dds = deseq2.nbinomWaldTest(dds, betaPrior=deseq2_args["betaPrior"]) - #gc.collect() - res = pandas2ri.ri2py(as_df(deseq2.results( - dds, - contrast=contrast, - addMLE=deseq2_args["addMLE"], - independentFiltering=deseq2_args["independentFiltering"]))) - res.index = counts_data.index - return res, {cond : size_factors.loc[cond][0] for cond in cond_names} diff --git a/libdeseq/setup.py b/libdeseq/setup.py deleted file mode 100644 index 82347e6c24357becd86af345a7e5d3c548af8c85..0000000000000000000000000000000000000000 --- a/libdeseq/setup.py +++ /dev/null @@ -1,23 +0,0 @@ -from setuptools import setup, find_packages -#from Cython.Build import cythonize - -name = "libdeseq" - -# Adapted from Biopython -__version__ = "Undefined" -for line in open("%s/__init__.py" % name): - if (line.startswith('__version__')): - exec(line.strip()) - - -setup( - name=name, - version=__version__, - description="Interfacing the call to DESEq2 with python.", - author="Blaise Li", - author_email="blaise.li@normalesup.org", - license="MIT", - packages=find_packages()) - #ext_modules = cythonize("libsmallrna/libsmallrna.pyx"), - #install_requires=["cytoolz"], - #zip_safe=False