Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
bioinfo_utils
Commits
c04ad30a
Commit
c04ad30a
authored
May 13, 2019
by
Blaise Li
Browse files
Log and save selection of lfc-lfc plots.
parent
1d91b2cc
Changes
1
Hide whitespace changes
Inline
Side-by-side
plot_lfclfc_scatter.py
View file @
c04ad30a
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
"""This script reads data from "tidy" files and makes plots out of
it, at the same scale."""
it, at the same scale.
It also outputs a table containing the plotted data points."""
import
argparse
import
os
import
sys
import
warnings
from
operator
import
attrgetter
,
contains
# from functools import partial
import
matplotlib
as
mpl
# To be able to run the script without a defined $DISPLAY
# mpl.use("PDF")
from
matplotlib.backends.backend_pgf
import
FigureCanvasPgf
import
pandas
as
pd
from
cytoolz
import
co
ncat
from
cytoolz
import
co
mpose
,
concat
,
curry
from
libhts
import
plot_scatter
from
libworkflows
import
save_plot
,
strip_split
...
...
@@ -86,8 +89,7 @@ class Scatterplot:
y_input_file
,
x_column
,
y_column
,
x_label
,
y_label
,
labels
,
extra_cols
=
None
):
if
extra_cols
is
None
:
x_usecols
=
[
"gene"
,
x_column
].
__contains__
...
...
@@ -127,8 +129,7 @@ class Scatterplot:
self
.
grouping_col
=
"_"
.
join
(
extra_cols
)
else
:
self
.
grouping_col
=
None
self
.
x_label
=
x_label
self
.
y_label
=
y_label
(
self
.
x_label
,
self
.
y_label
)
=
labels
def
apply_selector
(
self
,
selector
,
chose_from
=
None
):
"""Returns a list of gene ids based on a *selector* string.
...
...
@@ -142,8 +143,7 @@ class Scatterplot:
if
chose_from
:
return
[
gene_id
for
gene_id
in
self
.
data
.
query
(
selector
).
index
if
gene_id
in
chose_from
]
else
:
return
[
gene_id
for
gene_id
in
self
.
data
.
query
(
selector
).
index
]
return
[
gene_id
for
gene_id
in
self
.
data
.
query
(
selector
).
index
]
def
plot_maker
(
self
,
grouping
=
None
,
group2colour
=
None
,
**
kwargs
):
"""Builds a plotting function that can colour dots based on them
...
...
@@ -153,12 +153,19 @@ class Scatterplot:
*save_plot* can include it in the bounding box."""
# fig, axis = plot_scatter(
# print(kwargs["x_range"])
axis
=
plot_scatter
(
self
.
data
,
"x"
,
"y"
,
grouping
=
grouping
,
group2colour
=
group2colour
,
**
kwargs
)
try
:
axis
=
plot_scatter
(
self
.
data
,
"x"
,
"y"
,
grouping
=
grouping
,
group2colour
=
group2colour
,
**
kwargs
)
except
ValueError
as
err
:
if
str
(
err
)
==
"No data to plot."
:
warnings
.
warn
(
"No data to plot."
)
return
None
else
:
raise
# Lines indicating 2-fold threshold.
# Assumes the data are in log2 fold changes
line_style
=
{
...
...
@@ -281,8 +288,7 @@ class Scatterplot:
bbox_to_anchor
=
(
0
,
1
),
bbox_transform
=
axis
.
transAxes
,
loc
=
"lower left"
)
return
legend
,
else
:
return
None
return
None
# Not working:
# fig.tight_layout()
# strange behaviour (interaction with tight_layout?)
...
...
@@ -351,10 +357,10 @@ def main():
parser
.
add_argument
(
"--data_range"
,
help
=
"min and max of the data values to display. "
"If the range is to narrow to plot data, it will be ignored."
,
type
=
int
,
nargs
=
2
,
default
=
[
-
12
,
12
])
"If the range is to
o
narrow to plot data, it will be ignored."
,
type
=
int
,
nargs
=
2
,
default
=
[
-
12
,
12
])
parser
.
add_argument
(
"--extra_cols"
,
help
=
"Columns containing categorical information "
...
...
@@ -405,8 +411,7 @@ def main():
args
.
y_input_data
,
args
.
x_column
,
args
.
y_column
,
args
.
x_label
,
args
.
y_label
,
(
args
.
x_label
,
args
.
y_label
),
extra_cols
=
args
.
extra_cols
)
# if args.transform == "log2":
# transform = 2
...
...
@@ -425,14 +430,29 @@ def main():
assert
args
.
selection_label
,
msg
gene_list
=
plot_data
.
apply_selector
(
args
.
selector
,
base_gene_list
)
else
:
gene_list
=
base_gene_list
gene_list
=
list
(
plot_data
.
data
.
index
.
intersection
(
base_gene_list
))
if
args
.
plot_name
:
# https://stackoverflow.com/a/14364249/1878788
os
.
makedirs
(
args
.
plot_dir
,
exist_ok
=
True
)
out_pdf
=
OPJ
(
args
.
plot_dir
,
"%s.pdf"
%
args
.
plot_name
)
out_table
=
OPJ
(
args
.
plot_dir
,
"%s.tsv"
%
args
.
plot_name
)
out_log
=
OPJ
(
args
.
plot_dir
,
"%s.log"
%
args
.
plot_name
)
with
open
(
out_log
,
"w"
)
as
log_file
:
print
(
"
\\\n\t
"
.
join
(
sys
.
argv
),
file
=
log_file
)
if
gene_list
and
args
.
selection_label
:
plot_data
.
data
.
assign
(
hightlighted
=
plot_data
.
data
.
apply
(
# apply takes a function of row
# get the row name
# check whether it belongs to gene_list
compose
(
curry
(
contains
)(
gene_list
),
attrgetter
(
"name"
)),
axis
=
1
)).
to_csv
(
out_table
,
sep
=
"
\t
"
)
list_name
=
args
.
selection_label
# if args.gene_list:
# if args.selection_label:
...
...
@@ -455,6 +475,7 @@ def main():
"linewidth"
:
0.5
,
"color"
:
"0.5"
,
"linestyle"
:
"-"
},
regression
=
args
.
plot_regression
)
else
:
plot_data
.
data
.
to_csv
(
out_table
,
sep
=
"
\t
"
)
plot_data
.
save_plot
(
# args.x_axis, args.y_axis,
out_pdf
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment