Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
bioinfo_utils
Commits
fdce587a
Commit
fdce587a
authored
May 13, 2019
by
Blaise Li
Browse files
Made libdeseq a submodule.
parent
08452c07
Changes
7
Hide whitespace changes
Inline
Side-by-side
.gitmodules
View file @
fdce587a
...
...
@@ -10,3 +10,6 @@
[submodule "libhts"]
path = libhts
url = git@gitlab.pasteur.fr:bli/libhts.git
[submodule "libdeseq"]
path = libdeseq
url = git@gitlab.pasteur.fr:bli/libdeseq.git
libdeseq
@
8df8009e
Subproject commit 8df8009e005d16234b2b985f63e83ed25d0fa759
libdeseq/.gitignore
deleted
100644 → 0
View file @
08452c07
# Compiled python modules.
*.pyc
# Setuptools distribution folder.
/dist/
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
# Backups
*~
libdeseq/install.sh
deleted
100755 → 0
View file @
08452c07
#!/bin/sh
python3.6 setup.py build_ext
# .egg-link does not work with PYTHONPATH ?
python3.6
-m
pip
install
-e
.
python3.6
-m
pip
install
--no-deps
--ignore-installed
.
libdeseq/libdeseq/__init__.py
deleted
100644 → 0
View file @
08452c07
from
.libdeseq
import
(
do_deseq2
)
libdeseq/libdeseq/libdeseq.py
deleted
100644 → 0
View file @
08452c07
import
warnings
def
formatwarning
(
message
,
category
,
filename
,
lineno
,
line
):
"""Used to format warning messages."""
return
"%s:%s: %s: %s
\n
"
%
(
filename
,
lineno
,
category
.
__name__
,
message
)
warnings
.
formatwarning
=
formatwarning
import
pandas
as
pd
from
rpy2.robjects
import
r
,
pandas2ri
,
Formula
,
StrVector
as_df
=
r
(
"as.data.frame"
)
from
rpy2.rinterface
import
RRuntimeError
from
rpy2.robjects.packages
import
importr
deseq2
=
importr
(
"DESeq2"
)
#import gc
def
do_deseq2
(
cond_names
,
conditions
,
counts_data
,
formula
=
None
,
contrast
=
None
,
deseq2_args
=
None
):
"""Runs a DESeq2 differential expression analysis."""
if
formula
is
None
:
formula
=
Formula
(
"~ lib"
)
if
contrast
is
None
:
# FIXME: MUT and REF are not defined
# Maybe just make (formula and) contrast mandatory
contrast
=
StrVector
([
"lib"
,
MUT
,
REF
])
if
deseq2_args
is
None
:
deseq2_args
=
{
"betaPrior"
:
True
,
"addMLE"
:
True
,
"independentFiltering"
:
True
}
col_data
=
pd
.
DataFrame
(
conditions
).
assign
(
cond_name
=
pd
.
Series
(
cond_names
).
values
).
set_index
(
"cond_name"
)
# In case we want contrasts between factor combinations
if
(
"lib"
in
col_data
.
columns
)
and
(
"treat"
in
col_data
.
columns
):
col_data
=
col_data
.
assign
(
lib_treat
=
[
"%s_%s"
%
(
lib
,
treat
)
for
(
lib
,
treat
)
in
zip
(
col_data
[
"lib"
],
col_data
[
"treat"
])])
col_data_rownames
=
list
(
col_data
.
index
)
counts_data_colnames
=
list
(
counts_data
.
columns
)
if
col_data_rownames
!=
counts_data_colnames
:
warnings
.
warn
(
"The lines in the sample description do not match "
"the columns in the counts table.
\n
"
"Expect failures while loading data in DESeq2.
\n
"
)
# http://stackoverflow.com/a/31206596/1878788
pandas2ri
.
activate
()
# makes some conversions automatic
# r_counts_data = pandas2ri.py2ri(counts_data)
# r_col_data = pandas2ri.py2ri(col_data)
# r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib"))
# dds = deseq2.DESeq(dds, betaPrior=deseq2_args["betaPrior"])
# Decompose into the 3 steps to have more control on the options
dds
=
deseq2
.
DESeqDataSetFromMatrix
(
countData
=
counts_data
,
colData
=
col_data
,
design
=
formula
)
# try:
# dds = deseq2.DESeqDataSetFromMatrix(
# countData=counts_data,
# colData=col_data,
# design=formula)
# except RRuntimeError as e:
# # TODO: remove this debugging thing, or use a unique path
# # and issue a warning that indicates the path to the debug file
# col_data.to_csv("/tmp/col_data_debug.txt", sep="\t")
# counts_data.to_csv("/tmp/counts_data_debug.txt", sep="\t")
# raise
try
:
dds
=
deseq2
.
estimateSizeFactors_DESeqDataSet
(
dds
,
type
=
"ratio"
)
#gc.collect()
except
RRuntimeError
as
e
:
if
sum
(
counts_data
.
prod
(
axis
=
1
))
==
0
:
msg
=
""
.
join
([
"Error occurred in estimateSizeFactors:
\n
%s
\n
"
%
e
,
"This is probably because every gene has at least one zero.
\n
"
,
"We will try to use the
\"
poscounts
\"
method instead."
])
warnings
.
warn
(
msg
)
try
:
dds
=
deseq2
.
estimateSizeFactors_DESeqDataSet
(
dds
,
type
=
"poscounts"
)
#gc.collect()
except
RRuntimeError
as
e
:
msg
=
""
.
join
([
"Error occurred in estimateSizeFactors:
\n
%s
\n
"
%
e
,
"We give up."
])
warnings
.
warn
(
msg
)
#gc.collect()
raise
#print(counts_data.dtypes)
#print(counts_data.columns)
#print(len(counts_data))
#raise
else
:
raise
size_factors
=
pandas2ri
.
ri2py
(
as_df
(
deseq2
.
sizeFactors_DESeqDataSet
(
dds
)))
#for cond in cond_names:
# #s = size_factors.loc[cond][0]
# #(*_, s) = size_factors.loc[cond]
#pd.DataFrame({cond : size_factors.loc[cond][0] for cond in COND_NAMES}, index=('size_factor',))
try
:
dds
=
deseq2
.
estimateDispersions_DESeqDataSet
(
dds
,
fitType
=
"parametric"
)
#gc.collect()
except
RRuntimeError
as
e
:
msg
=
""
.
join
([
"Error occurred in estimateDispersions:
\n
%s
\n
"
%
e
,
"We will try with fitType=
\"
local
\"
."
])
warnings
.
warn
(
msg
)
try
:
dds
=
deseq2
.
estimateDispersions_DESeqDataSet
(
dds
,
fitType
=
"local"
)
#gc.collect()
except
RRuntimeError
as
e
:
msg
=
""
.
join
([
"Error occurred in estimateDispersions:
\n
%s
\n
"
%
e
,
"We will try with fitType=
\"
mean
\"
."
])
warnings
.
warn
(
msg
)
try
:
dds
=
deseq2
.
estimateDispersions_DESeqDataSet
(
dds
,
fitType
=
"mean"
)
#gc.collect()
except
RRuntimeError
as
e
:
msg
=
""
.
join
([
"Error occurred in estimateDispersions:
\n
%s
\n
"
%
e
,
"We give up."
])
warnings
.
warn
(
msg
)
#gc.collect()
raise
dds
=
deseq2
.
nbinomWaldTest
(
dds
,
betaPrior
=
deseq2_args
[
"betaPrior"
])
#gc.collect()
res
=
pandas2ri
.
ri2py
(
as_df
(
deseq2
.
results
(
dds
,
contrast
=
contrast
,
addMLE
=
deseq2_args
[
"addMLE"
],
independentFiltering
=
deseq2_args
[
"independentFiltering"
])))
res
.
index
=
counts_data
.
index
return
res
,
{
cond
:
size_factors
.
loc
[
cond
][
0
]
for
cond
in
cond_names
}
libdeseq/setup.py
deleted
100644 → 0
View file @
08452c07
from
setuptools
import
setup
,
find_packages
#from Cython.Build import cythonize
name
=
"libdeseq"
# Adapted from Biopython
__version__
=
"Undefined"
for
line
in
open
(
"%s/__init__.py"
%
name
):
if
(
line
.
startswith
(
'__version__'
)):
exec
(
line
.
strip
())
setup
(
name
=
name
,
version
=
__version__
,
description
=
"Interfacing the call to DESEq2 with python."
,
author
=
"Blaise Li"
,
author_email
=
"blaise.li@normalesup.org"
,
license
=
"MIT"
,
packages
=
find_packages
())
#ext_modules = cythonize("libsmallrna/libsmallrna.pyx"),
#install_requires=["cytoolz"],
#zip_safe=False
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment