Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
libhts
Commits
96bef359
Commit
96bef359
authored
Feb 22, 2018
by
Blaise Li
Browse files
More robust median ratio to pseudo-ref.
Also made scatterplot axes style more configurable.
parent
eed9eae4
Changes
1
Hide whitespace changes
Inline
Side-by-side
libhts/libhts.py
View file @
96bef359
...
...
@@ -9,6 +9,7 @@ def formatwarning(message, category, filename, lineno, line):
warnings
.
formatwarning
=
formatwarning
import
numpy
as
np
import
pandas
as
pd
# To compute correlation coefficient, and compute linear regression
from
scipy.stats.stats
import
pearsonr
,
linregress
...
...
@@ -242,6 +243,27 @@ def do_deseq2(cond_names, conditions, counts_data,
return
res
,
{
cond
:
size_factors
.
loc
[
cond
][
0
]
for
cond
in
cond_names
}
## Not sure this is a good idea...
# def masked_gmean(a, axis=0, dtype=None):
# """Modified from stats.py."""
# # Converts the data into a masked array
# ma = np.ma.masked_invalid(a)
# # Apply gmean
# if not isinstance(ma, np.ndarray):
# # if not an ndarray object attempt to convert it
# log_a = np.log(np.array(ma, dtype=dtype))
# elif dtype:
# # Must change the default dtype allowing array type
# if isinstance(ma, np.ma.MaskedArray):
# log_a = np.log(np.ma.asarray(ma, dtype=dtype))
# else:
# log_a = np.log(np.asarray(ma, dtype=dtype))
# else:
# log_a = np.log(ma)
# return np.exp(log_a.mean(axis=axis))
def
median_ratio_to_pseudo_ref_size_factors
(
counts_data
):
"""Adapted from DESeq paper (doi:10.1186/gb-2010-11-10-r106)
All libraries are used to define a pseudo-reference, which has
...
...
@@ -252,11 +274,20 @@ def median_ratio_to_pseudo_ref_size_factors(counts_data):
#pseudo_ref = (counts_data + 1).apply(gmean, axis=1) - 1
# Ignore lines with zeroes instead (may be bad for IP: many zeroes expected):
pseudo_ref
=
(
counts_data
[
counts_data
.
prod
(
axis
=
1
)
>
0
]).
apply
(
gmean
,
axis
=
1
)
# Ignore lines with only zeroes
# pseudo_ref = (counts_data[counts_data.sum(axis=1) > 0]).apply(masked_gmean, axis=1)
def
median_ratio_to_pseudo_ref
(
col
):
return
(
col
/
pseudo_ref
).
median
()
#size_factors = counts_data.apply(median_ratio_to_pseudo_ref, axis=0)
return
counts_data
[
counts_data
.
prod
(
axis
=
1
)
>
0
].
apply
(
median_ratios
=
counts_data
[
counts_data
.
prod
(
axis
=
1
)
>
0
].
apply
(
median_ratio_to_pseudo_ref
,
axis
=
0
)
# Not sure fillna(0) is appropriate
if
any
(
median_ratios
.
isna
()):
msg
=
"Could not compute median ratios to pseudo reference.
\n
"
warnings
.
warn
(
msg
)
return
median_ratios
.
fillna
(
1
)
else
:
return
median_ratios
def
size_factor_correlations
(
counts_data
,
summaries
,
normalizer
):
...
...
@@ -442,7 +473,8 @@ def plot_scatter(data,
grouping
=
None
,
group2colour
=
None
,
x_range
=
None
,
y_range
=
None
):
y_range
=
None
,
axes_style
=
None
):
fig
,
ax
=
plt
.
subplots
()
# First plot the data in grey
data
.
plot
.
scatter
(
...
...
@@ -468,7 +500,7 @@ def plot_scatter(data,
if
group2colour
is
None
:
statuses
=
data
[
grouping
].
unique
()
group2colour
=
dict
(
zip
(
statuses
,
sns
.
color_palette
(
"colorblind"
,
len
(
statuses
))))
for
status
,
group
in
data
.
groupby
(
grouping
):
for
(
status
,
group
)
in
data
.
groupby
(
grouping
):
group
.
plot
.
scatter
(
x
=
x_column
,
y
=
y_column
,
s
=
1
,
c
=
group2colour
[
status
],
label
=
f
"
{
status
}
(
{
len
(
group
)
}
)"
,
ax
=
ax
)
...
...
@@ -486,8 +518,12 @@ def plot_scatter(data,
raise
else
:
warnings
.
warn
(
f
"Nothing to plot for
{
status
}
\n
"
)
ax
.
axhline
(
y
=
0
,
linewidth
=
0.5
,
color
=
"0.5"
,
linestyle
=
"dashed"
)
ax
.
axvline
(
x
=
0
,
linewidth
=
0.5
,
color
=
"0.5"
,
linestyle
=
"dashed"
)
if
axes_style
is
None
:
axes_style
=
{
"linewidth"
:
0.5
,
"color"
:
"0.5"
,
"linestyle"
:
"dashed"
}
ax
.
axhline
(
y
=
0
,
**
axes_style
)
ax
.
axvline
(
x
=
0
,
**
axes_style
)
# ax.axhline(y=0, linewidth=0.5, color="0.5", linestyle="dashed")
# ax.axvline(x=0, linewidth=0.5, color="0.5", linestyle="dashed")
# Set axis limits
if
x_range
is
not
None
:
(
x_min
,
x_max
)
=
x_range
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment