Made libdeseq a submodule.

fdce587a · Blaise Li · 08452c07 · fdce587a · 8df8009e · 08452c07
Commit fdce587a authored 6 years ago by Blaise Li
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "libhts"]
 	path = libhts
 	url = git@gitlab.pasteur.fr:bli/libhts.git
+[submodule "libdeseq"]
+	path = libdeseq
+	url = git@gitlab.pasteur.fr:bli/libdeseq.git
--- a/libdeseq @ 8df8009e
+++ b/libdeseq @ 8df8009e
+Subproject commit 8df8009e005d16234b2b985f63e83ed25d0fa759
--- a/libdeseq/.gitignore
+++ b/libdeseq/.gitignore
-# Compiled python modules.
-*.pyc
-
-# Setuptools distribution folder.
-/dist/
-
-# Python egg metadata, regenerated from source files by setuptools.
-/*.egg-info
-
-# Backups
-*~
--- a/libdeseq/install.sh
+++ b/libdeseq/install.sh
-#!/bin/sh
-python3.6 setup.py build_ext
-# .egg-link does not work with PYTHONPATH ?
-python3.6 -m pip install -e .
-python3.6 -m pip install  --no-deps --ignore-installed .
--- a/libdeseq/libdeseq/__init__.py
+++ b/libdeseq/libdeseq/__init__.py
-from .libdeseq import (
-    do_deseq2)
--- a/libdeseq/libdeseq/libdeseq.py
+++ b/libdeseq/libdeseq/libdeseq.py
-import warnings
-
-
-def formatwarning(message, category, filename, lineno, line):
-    """Used to format warning messages."""
-    return "%s:%s: %s: %s\n" % (filename, lineno, category.__name__, message)
-
-
-warnings.formatwarning = formatwarning
-
-import pandas as pd
-from rpy2.robjects import r, pandas2ri, Formula, StrVector
-as_df = r("as.data.frame")
-from rpy2.rinterface import RRuntimeError
-from rpy2.robjects.packages import importr
-deseq2 = importr("DESeq2")
-#import gc
-
-
-def do_deseq2(cond_names, conditions, counts_data,
-              formula=None, contrast=None, deseq2_args=None):
-    """Runs a DESeq2 differential expression analysis."""
-    if formula is None:
-        formula = Formula("~ lib")
-    if contrast is None:
-        # FIXME: MUT and REF are not defined
-        # Maybe just make (formula and) contrast mandatory
-        contrast = StrVector(["lib", MUT, REF])
-    if deseq2_args is None:
-        deseq2_args = {"betaPrior" : True, "addMLE" : True, "independentFiltering" : True}
-    col_data = pd.DataFrame(conditions).assign(
-        cond_name=pd.Series(cond_names).values).set_index("cond_name")
-    # In case we want contrasts between factor combinations
-    if ("lib" in col_data.columns) and ("treat" in col_data.columns):
-        col_data = col_data.assign(
-            lib_treat = ["%s_%s" % (lib, treat) for (lib, treat) in zip(
-                col_data["lib"], col_data["treat"])])
-    col_data_rownames = list(col_data.index)
-    counts_data_colnames = list(counts_data.columns)
-    if col_data_rownames != counts_data_colnames:
-        warnings.warn(
-            "The lines in the sample description do not match "
-            "the columns in the counts table.\n"
-            "Expect failures while loading data in DESeq2.\n")
-    # http://stackoverflow.com/a/31206596/1878788
-    pandas2ri.activate()  # makes some conversions automatic
-    # r_counts_data = pandas2ri.py2ri(counts_data)
-    # r_col_data = pandas2ri.py2ri(col_data)
-    # r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib"))
-    # dds = deseq2.DESeq(dds, betaPrior=deseq2_args["betaPrior"])
-    # Decompose into the 3 steps to have more control on the options
-    dds = deseq2.DESeqDataSetFromMatrix(
-        countData=counts_data,
-        colData=col_data,
-        design=formula)
-    # try:
-    #     dds = deseq2.DESeqDataSetFromMatrix(
-    #         countData=counts_data,
-    #         colData=col_data,
-    #         design=formula)
-    # except RRuntimeError as e:
-    #     # TODO: remove this debugging thing, or use a unique path
-    #     # and issue a warning that indicates the path to the debug file
-    #     col_data.to_csv("/tmp/col_data_debug.txt", sep="\t")
-    #     counts_data.to_csv("/tmp/counts_data_debug.txt", sep="\t")
-    #     raise
-    try:
-        dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="ratio")
-        #gc.collect()
-    except RRuntimeError as e:
-        if sum(counts_data.prod(axis=1)) == 0:
-            msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e,
-                           "This is probably because every gene has at least one zero.\n",
-                           "We will try to use the \"poscounts\" method instead."])
-            warnings.warn(msg)
-            try:
-                dds = deseq2.estimateSizeFactors_DESeqDataSet(dds, type="poscounts")
-                #gc.collect()
-            except RRuntimeError as e:
-                msg = "".join(["Error occurred in estimateSizeFactors:\n%s\n" % e,
-                               "We give up."])
-                warnings.warn(msg)
-                #gc.collect()
-                raise
-                #print(counts_data.dtypes)
-                #print(counts_data.columns)
-                #print(len(counts_data))
-                #raise
-        else:
-            raise
-    size_factors = pandas2ri.ri2py(as_df(deseq2.sizeFactors_DESeqDataSet(dds)))
-    #for cond in cond_names:
-    #    #s = size_factors.loc[cond][0]
-    #    #(*_, s) = size_factors.loc[cond]
-    #pd.DataFrame({cond : size_factors.loc[cond][0] for cond in COND_NAMES}, index=('size_factor',))
-    try:
-        dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="parametric")
-        #gc.collect()
-    except RRuntimeError as e:
-        msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
-                    "We will try with fitType=\"local\"."])
-        warnings.warn(msg)
-        try:
-            dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="local")
-            #gc.collect()
-        except RRuntimeError as e:
-            msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
-                        "We will try with fitType=\"mean\"."])
-            warnings.warn(msg)
-            try:
-                dds = deseq2.estimateDispersions_DESeqDataSet(dds, fitType="mean")
-                #gc.collect()
-            except RRuntimeError as e:
-                msg = "".join(["Error occurred in estimateDispersions:\n%s\n" % e,
-                            "We give up."])
-                warnings.warn(msg)
-                #gc.collect()
-                raise
-    dds = deseq2.nbinomWaldTest(dds, betaPrior=deseq2_args["betaPrior"])
-    #gc.collect()
-    res = pandas2ri.ri2py(as_df(deseq2.results(
-        dds,
-        contrast=contrast,
-        addMLE=deseq2_args["addMLE"],
-        independentFiltering=deseq2_args["independentFiltering"])))
-    res.index = counts_data.index
-    return res, {cond : size_factors.loc[cond][0] for cond in cond_names}
--- a/libdeseq/setup.py
+++ b/libdeseq/setup.py
-from setuptools import setup, find_packages
-#from Cython.Build import cythonize
-
-name = "libdeseq"
-
-# Adapted from Biopython
-__version__ = "Undefined"
-for line in open("%s/__init__.py" % name):
-    if (line.startswith('__version__')):
-        exec(line.strip())
-
-
-setup(
-    name=name,
-    version=__version__,
-    description="Interfacing the call to DESEq2 with python.",
-    author="Blaise Li",
-    author_email="blaise.li@normalesup.org",
-    license="MIT",
-    packages=find_packages())
-    #ext_modules = cythonize("libsmallrna/libsmallrna.pyx"),
-    #install_requires=["cytoolz"],
-    #zip_safe=False