Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
libhts
Commits
f2d5d8f0
Commit
f2d5d8f0
authored
Dec 06, 2017
by
Blaise Li
Browse files
Script to make lfc scatterplots.
parent
d451a2a5
Changes
2
Hide whitespace changes
Inline
Side-by-side
libhts/__init__.py
View file @
f2d5d8f0
from
.libhts
import
do_deseq2
,
gtf_2_genes_exon_lengths
,
median_ratio_to_pseudo_ref_size_factors
,
plot_boxplots
,
plot_counts_distribution
,
plot_lfc_distribution
,
plot_MA
,
plot_norm_correlations
,
repeat_bed_2_lengths
,
size_factor_correlations
,
status_setter
from
.libhts
import
do_deseq2
,
gtf_2_genes_exon_lengths
,
median_ratio_to_pseudo_ref_size_factors
,
plot_boxplots
,
plot_counts_distribution
,
plot_lfc_distribution
,
plot_MA
,
plot_norm_correlations
,
plot_scatter
,
repeat_bed_2_lengths
,
size_factor_correlations
,
status_setter
libhts/libhts.py
View file @
f2d5d8f0
...
...
@@ -10,8 +10,8 @@ def formatwarning(message, category, filename, lineno, line):
warnings
.
formatwarning
=
formatwarning
import
pandas
as
pd
# To compute correlation coefficient
from
scipy.stats.stats
import
pearsonr
# To compute correlation coefficient
, and compute linear regression
from
scipy.stats.stats
import
pearsonr
,
linregress
# To compute geometric mean
from
scipy.stats.mstats
import
gmean
import
matplotlib.pyplot
as
plt
...
...
@@ -150,6 +150,7 @@ def do_deseq2(cond_names, conditions, counts_data,
if
formula
is
None
:
formula
=
Formula
(
"~ lib"
)
if
contrast
is
None
:
# FIXME: MUT and REF are not defined
contrast
=
StrVector
([
"lib"
,
MUT
,
REF
])
if
deseq2_args
is
None
:
deseq2_args
=
{
"betaPrior"
:
True
,
"addMLE"
:
True
,
"independentFiltering"
:
True
}
...
...
@@ -374,22 +375,27 @@ def plot_MA(res,
This may not be good for genes with low expression levels."""
fig
,
ax
=
plt
.
subplots
()
# Make a column indicating whether the gene is DE or NS
res
=
res
.
assign
(
is_DE
=
res
.
apply
(
set_de_status
,
axis
=
1
))
data
=
res
.
assign
(
is_DE
=
res
.
apply
(
set_de_status
,
axis
=
1
))
if
fold_type
is
None
:
fold_type
=
"log2FoldChange"
# First plot the data in grey and black
for
de_status
,
group
in
res
.
groupby
(
"is_DE"
):
for
de_status
,
group
in
data
.
groupby
(
"is_DE"
):
group
.
plot
.
scatter
(
x
=
"baseMean"
,
y
=
fold_type
,
s
=
2
,
logx
=
True
,
c
=
DE2COLOUR
[
de_status
],
label
=
f
"
{
de_status
}
(
{
len
(
group
)
}
)"
,
ax
=
ax
)
if
grouping
is
not
None
:
if
isinstance
(
grouping
,
str
):
# Overlay colours based on the "grouping" column
if
group2colour
is
None
:
group2colour
=
STATUS2COLOUR
for
status
,
group
in
res
.
groupby
(
grouping
):
group
.
plot
.
scatter
(
x
=
"baseMean"
,
y
=
fold_type
,
s
=
1
,
logx
=
True
,
c
=
group2colour
[
status
],
label
=
f
"
{
status
}
(
{
len
(
group
)
}
)"
,
ax
=
ax
)
for
status
,
group
in
data
.
groupby
(
grouping
):
group
.
plot
.
scatter
(
x
=
"baseMean"
,
y
=
fold_type
,
s
=
1
,
logx
=
True
,
c
=
group2colour
[
status
],
label
=
f
"
{
status
}
(
{
len
(
group
)
}
)"
,
ax
=
ax
)
else
:
(
status
,
colour
)
=
group2colour
res
.
ix
[
grouping
].
plot
.
scatter
(
x
=
"baseMean"
,
y
=
fold_type
,
s
=
1
,
logx
=
True
,
c
=
colour
,
label
=
f
"
{
status
}
(
{
len
(
grouping
)
}
)"
,
ax
=
ax
)
row_indices
=
data
.
index
.
intersection
(
grouping
)
data
.
ix
[
row_indices
].
plot
.
scatter
(
x
=
"baseMean"
,
y
=
fold_type
,
s
=
1
,
logx
=
True
,
c
=
colour
,
label
=
f
"
{
status
}
(
{
len
(
row_indices
)
}
)"
,
ax
=
ax
)
ax
.
axhline
(
y
=
1
,
linewidth
=
0.5
,
color
=
"0.5"
,
linestyle
=
"dashed"
)
ax
.
axhline
(
y
=-
1
,
linewidth
=
0.5
,
color
=
"0.5"
,
linestyle
=
"dashed"
)
# TODO: check data basemean range
...
...
@@ -397,10 +403,71 @@ def plot_MA(res,
ax
.
set_xlim
(
mean_range
)
if
lfc_range
is
not
None
:
(
lfc_min
,
lfc_max
)
=
lfc_range
lfc_here_min
=
getattr
(
res
,
fold_type
).
min
()
lfc_here_max
=
getattr
(
res
,
fold_type
).
max
()
lfc_here_min
=
getattr
(
data
,
fold_type
).
min
()
lfc_here_max
=
getattr
(
data
,
fold_type
).
max
()
if
(
lfc_here_min
<
lfc_min
)
or
(
lfc_here_max
>
lfc_max
):
warnings
.
warn
(
f
"Cannot plot
{
fold_type
}
data ([
{
lfc_here_min
}
,
{
lfc_here_max
}
]) in requested range ([
{
lfc_min
}
,
{
lfc_max
}
])
\n
"
)
else
:
ax
.
set_ylim
(
lfc_range
)
def
plot_scatter
(
data
,
x_column
,
y_column
,
regression
=
False
,
grouping
=
None
,
group2colour
=
None
,
x_range
=
None
,
y_range
=
None
):
fig
,
ax
=
plt
.
subplots
()
# First plot the data in grey
data
.
plot
.
scatter
(
x
=
x_column
,
y
=
y_column
,
s
=
2
,
c
=
"lightgray"
,
ax
=
ax
)
if
regression
:
linreg
=
linregress
(
data
[[
x_column
,
y_column
]].
dropna
())
a
=
linreg
.
slope
b
=
linreg
.
intercept
def
fit
(
x
):
return
(
a
*
x
)
+
b
min_x
=
data
[[
x_column
]].
min
()[
0
]
max_x
=
data
[[
x_column
]].
max
()[
0
]
min_y
=
fit
(
min_x
)
max_y
=
fit
(
max_x
)
xfit
,
yfit
=
(
min_x
,
max_x
),
(
min_y
,
max_y
)
ax
.
plot
(
xfit
,
yfit
,
linewidth
=
0.5
,
color
=
"0.5"
,
linestyle
=
"dashed"
)
# Overlay colour points
if
grouping
is
not
None
:
if
isinstance
(
grouping
,
str
):
# Determine colours based on the "grouping" column
if
group2colour
is
None
:
statuses
=
data
[
grouping
].
unique
()
group2colour
=
dict
(
zip
(
statuses
,
sns
.
color_palette
(
"colorblind"
,
len
(
statuses
))))
for
status
,
group
in
data
.
groupby
(
grouping
):
group
.
plot
.
scatter
(
x
=
x_column
,
y
=
y_column
,
s
=
1
,
c
=
group2colour
[
status
],
label
=
f
"
{
status
}
(
{
len
(
group
)
}
)"
,
ax
=
ax
)
else
:
# Apply a coulour to a list of genes
(
status
,
colour
)
=
group2colour
row_indices
=
data
.
index
.
intersection
(
grouping
)
data
.
ix
[
row_indices
].
plot
.
scatter
(
x
=
x_column
,
y
=
y_column
,
s
=
1
,
c
=
colour
,
label
=
f
"
{
status
}
(
{
len
(
row_indices
)
}
)"
,
ax
=
ax
)
ax
.
axhline
(
y
=
0
,
linewidth
=
0.5
,
color
=
"0.5"
,
linestyle
=
"dashed"
)
ax
.
axvline
(
x
=
0
,
linewidth
=
0.5
,
color
=
"0.5"
,
linestyle
=
"dashed"
)
# Set axis limits
if
x_range
is
not
None
:
(
x_min
,
x_max
)
=
x_range
x_here_min
=
getattr
(
data
,
x_column
).
min
()
x_here_max
=
getattr
(
data
,
x_column
).
max
()
if
(
x_here_min
<
x_min
)
or
(
x_here_max
>
x_max
):
warnings
.
warn
(
f
"Cannot plot
{
x_column
}
data ([
{
x_here_min
}
,
{
x_here_max
}
]) in requested range ([
{
x_min
}
,
{
x_max
}
])
\n
"
)
else
:
ax
.
set_xlim
(
x_range
)
if
y_range
is
not
None
:
(
y_min
,
y_max
)
=
y_range
y_here_min
=
getattr
(
data
,
y_column
).
min
()
y_here_max
=
getattr
(
data
,
y_column
).
max
()
if
(
y_here_min
<
y_min
)
or
(
y_here_max
>
y_max
):
warnings
.
warn
(
f
"Cannot plot
{
y_column
}
data ([
{
y_here_min
}
,
{
y_here_max
}
]) in requested range ([
{
y_min
}
,
{
y_max
}
])
\n
"
)
else
:
ax
.
set_ylim
(
y_range
)
return
ax
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment