Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
bioinfo_utils
Commits
a1725563
Commit
a1725563
authored
May 13, 2019
by
Blaise Li
Browse files
Moved plotting scripts to a submodule.
parent
15d0feef
Changes
4
Hide whitespace changes
Inline
Side-by-side
.gitmodules
View file @
a1725563
...
...
@@ -16,3 +16,6 @@
[submodule "libsmallrna"]
path = libsmallrna
url = git@gitlab.pasteur.fr:bli/libsmallrna.git
[submodule "plotting_scripts"]
path = plotting_scripts
url = git@gitlab.pasteur.fr:bli/plotting_scripts.git
plotting_scripts
@
c423fc12
Subproject commit c423fc12a58f7adca64ef2c6f9afd55162bb5dab
plotting_scripts/create_metagene_profile.py
deleted
100755 → 0
View file @
15d0feef
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
"""
This script reads bigwig files, and creates a metagene profile.
The profiles can be restricted based on a list of gene identifiers.
"""
# TODO: Set font size or graph width to avoid very large titles (also for the pipeline) ?
# TODO: What about using deeptools?
# deeptools.countReadsPerBin.CountReadsPerBin, using:
# - bedFile, with the gene spans
# - blackListFileName, with the introns
# The above should count reads in the exonic part.
# TODO: Check this is true: one count per region in bedFile
# Or use count_reads_in_region twice, and compute differences?
# Then scale this to the gene exonic length
# This might correspond to the following computeMatrix scale-regions option:
# --blackListFileName BED file, -bl BED file
# A BED file containing regions that should be excluded
# from all analyses. Currently this works by rejecting
# genomic chunks that happen to overlap an entry.
# Consequently, for BAM files, if a read partially
# overlaps a blacklisted region or a fragment spans over
# it, then the read/fragment might still be considered.
#
# See also --metagene option:
# GTF/BED12 options:
# --metagene When either a BED12 or GTF file are used to provide
# regions, perform the computation on the merged exons,
# rather than using the genomic interval defined by the
# 5-prime and 3-prime most transcript bound (i.e.,
# columns 2 and 3 of a BED file). If a BED3 or BED6 file
# is used as input, then columns 2 and 3 are used as an
# exon. (default: False)
# --transcriptID TRANSCRIPTID
# When a GTF file is used to provide regions, only
# entries with this value as their feature (column 2)
# will be processed as transcripts. (default:
# transcript)
# --exonID EXONID When a GTF file is used to provide regions, only
# entries with this value as their feature (column 2)
# will be processed as exons. CDS would be another
# common value for this. (default: exon)
# --transcript_id_designator TRANSCRIPT_ID_DESIGNATOR
# Each region has an ID (e.g., ACTB) assigned to it,
# which for BED files is either column 4 (if it exists)
# or the interval bounds. For GTF files this is instead
# stored in the last column as a key:value pair (e.g.,
# as 'transcript_id "ACTB"', for a key of transcript_id
# and a value of ACTB). In some cases it can be
# convenient to use a different identifier. To do so,
# set this to the desired key. (default: transcript_id)
import
argparse
import
os
import
sys
import
warnings
from
copy
import
deepcopy
from
pathlib
import
Path
from
tempfile
import
NamedTemporaryFile
from
collections
import
defaultdict
# from itertools import chain
from
sqlite3
import
OperationalError
from
yaml
import
load
as
yload
# https://pythonhosted.org/gffutils/
from
gffutils
import
FeatureDB
,
create_db
from
deeptools
import
heatmapper
from
deeptools.plotProfile
import
Profile
from
cytoolz
import
keyfilter
,
valmap
def
formatwarning
(
message
,
category
,
filename
,
lineno
,
line
):
"""Format warning messages."""
return
"%s:%s: %s: %s
\n
"
%
(
filename
,
lineno
,
category
.
__name__
,
message
)
warnings
.
formatwarning
=
formatwarning
def
open_feature_db
(
db_filename
,
annot_filename
=
None
,
force_new
=
False
,
**
kwd_args
):
"""
Open a :class:`gffutil.FeatureDB` stored as *db_filename*.
If *annot_filename* is provided, it can be used to create *db_filename* in
case this file does not exist.
If *force_new* is set to True, the database will be re-created even if
*db_filename* already exists.
*kwd_args* are extra keyword arguments to be passed to
:func:`gffutils.create_db`.
"""
# set default annotation file name, if necessary
if
annot_filename
is
None
:
annot_filename
=
"/pasteur/entites/Mhe/Genomes/"
\
"C_elegans/Caenorhabditis_elegans/Ensembl/"
\
"WBcel235/Annotation/Genes/genes.gtf"
def
do_create_db
():
"""Create the feature database."""
assert
Path
(
annot_filename
).
exists
(),
\
f
"
{
annot_filename
}
is not found.
\n
"
\
"Cannot create a FeatureDB without a source of annotations."
print
(
"Creating feature database. This may take time."
,
file
=
sys
.
stderr
)
create_db
(
annot_filename
,
dbfn
=
db_filename
,
force
=
True
,
**
kwd_args
)
if
force_new
:
do_create_db
()
try
:
fdb
=
FeatureDB
(
db_filename
)
except
(
ValueError
,
OperationalError
)
as
open_err
:
warnings
.
warn
(
f
"Cannot open
{
db_filename
}
:
\n
"
f
"
{
str
(
open_err
)
}
\n
"
"Trying to re-create it."
)
do_create_db
()
fdb
=
FeatureDB
(
db_filename
)
return
fdb
def
iter_transcripts
(
fdb
,
genes
=
None
):
"""
Iterate over the transcripts in *fdb*.
If *gene* is not None, only the transcripts of genes whose `gene_id`
attribute are in *genes* are yielded.
"""
if
genes
is
not
None
:
for
transcript
in
fdb
.
all_features
(
featuretype
=
"transcript"
):
[
gene_id
]
=
transcript
[
"gene_id"
]
if
gene_id
in
genes
:
yield
transcript
else
:
yield
from
fdb
.
all_features
(
featuretype
=
"transcript"
)
def
merged_transcripts
(
fdb
,
genes
=
None
):
"""
Iterate over the transcripts in *fdb*, merged per `gene_id`.
The merge results in the loss of the `attributes` attribute,
except `gene_id`.
If *gene* is not None, only the transcripts of genes whose `gene_id`
attribute are in *genes* are yielded.
"""
transcript_dict
=
defaultdict
(
list
)
for
transcript
in
fdb
.
all_features
(
featuretype
=
"transcript"
):
[
gene_id
]
=
transcript
[
"gene_id"
]
if
genes
is
None
or
gene_id
in
genes
:
transcript_dict
[
gene_id
].
append
(
transcript
)
# return chain.from_iterable(valmap(fdb.merge, transcript_dict).values())
for
(
gene_id
,
(
feature
,))
in
valmap
(
fdb
.
merge
,
transcript_dict
).
items
():
# Restore the gene_id attribute
feature
.
attributes
[
"gene_id"
]
=
gene_id
yield
feature
def
feature2bed
(
feature
):
"""Converts a :class:`gffutils.feature.Feature` *feature*
into a bed-formatted line."""
try
:
[
gene_id
]
=
feature
.
attributes
[
"gene_id"
]
except
KeyError
:
gene_id
=
"."
return
"
\t
"
.
join
([
feature
.
chrom
,
str
(
feature
.
start
-
1
),
str
(
feature
.
stop
),
gene_id
,
feature
.
score
,
feature
.
strand
])
def
bedfile_from_gene_list
(
fdb
,
genes
):
"""Create a temporary bed file for the merged transcripts corresponding
to gene ids listed in *genes*, and using the :class:`gffutil.FeatureDB`
*fdb*
The name of the bed file is returned.
After using it, it should be deleted using os.unlink(bedfile_name)"""
with
NamedTemporaryFile
(
mode
=
"w"
,
delete
=
False
)
as
bedfile
:
print
(
*
map
(
feature2bed
,
merged_transcripts
(
fdb
,
genes
)),
sep
=
"
\n
"
,
file
=
bedfile
)
return
bedfile
.
name
def
fix_none
(
param_value
):
"""Convert "None" as string into real Python None.
This can be used to fix values in a yaml loaded dictionary."""
if
param_value
==
"None"
:
return
None
return
param_value
DEFAULT_PARAMETERS
=
{
"upstream"
:
0
,
"downstream"
:
0
,
# TODO: change to 2000 ?
"body"
:
500
,
"bin size"
:
10
,
"ref point"
:
None
,
"verbose"
:
False
,
"bin avg type"
:
"mean"
,
"missing data as zero"
:
False
,
"min threshold"
:
None
,
"max threshold"
:
None
,
"scale"
:
1
,
"skip zeros"
:
False
,
"nan after end"
:
False
,
"proc number"
:
4
,
"sort regions"
:
"keep"
,
"sort using"
:
"mean"
,
"unscaled 5 prime"
:
0
,
"unscaled 3 prime"
:
0
,
"start_label"
:
"start"
,
"end_label"
:
"end"
,
"label_rotation"
:
90
,
}
PARAMETER_INFO
=
"
\n
"
.
join
(
DEFAULT_PARAMETERS
.
keys
())
def
is_prof_param
(
key
):
"""Determine if *key* corresponds to a valid parameter for a *Profile*."""
return
key
in
{
"plot_title"
,
"y_axis_label"
,
"y_min"
,
"y_max"
,
"averagetype"
,
"reference_point_label"
,
"start_label"
,
"end_label"
,
"plot_height"
,
"plot_width"
,
"per_group"
,
"plot_type"
,
"image_format"
,
"color_list"
,
"legend_location"
,
"plots_per_row"
,
"label_rotation"
,
"dpi"
}
def
compute_matrix
(
bigwig_filenames
,
bed_filename
,
plot_filename
=
None
,
**
extra_parameters
):
"""Combine information from bigwig files *bigwig_filenames* and bed file
*bed_filename*.
If *plot_filename* is set, write the corresponding meta profile
in this file.
"""
parameters
=
deepcopy
(
DEFAULT_PARAMETERS
)
parameters
.
update
(
extra_parameters
)
heatm
=
heatmapper
.
heatmapper
()
heatm
.
computeMatrix
(
bigwig_filenames
,
bed_filename
,
parameters
)
if
"sample_labels"
in
parameters
:
heatm
.
matrix
.
set_sample_labels
(
parameters
[
"sample_labels"
])
if
"group_labels"
in
parameters
:
heatm
.
matrix
.
set_group_labels
(
parameters
[
"group_labels"
])
# Fixing parameters (as in heatmapper.read_matrix_file
# and heatmapper.save_matrix)
nb_samples
=
len
(
heatm
.
matrix
.
sample_labels
)
hm_params
=
dict
()
for
(
key
,
val
)
in
heatm
.
parameters
.
items
():
if
isinstance
(
val
,
list
)
and
not
val
:
val
=
None
if
key
in
heatm
.
special_params
and
not
isinstance
(
val
,
list
):
val
=
[
val
]
*
nb_samples
if
not
val
:
val
=
[
None
]
*
nb_samples
hm_params
[
key
]
=
val
heatm
.
parameters
=
hm_params
if
plot_filename
is
not
None
:
print
(
f
"plotting profile to
{
plot_filename
}
"
)
prof_params
=
keyfilter
(
is_prof_param
,
parameters
)
prof_params
[
"per_group"
]
=
True
prof
=
Profile
(
heatm
,
plot_filename
,
**
prof_params
)
prof
.
plot_profile
()
# def get_transcript_structure(fdb, transcript):
# [t_id] = transcript["transcript_id"]
# return list(chain(
# fdb.children(t_id, featuretype="CDS"),
# fdb.children(t_id, featuretype="UTR")))
def
main
():
"""Main function of the program."""
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
"-d"
,
"--feature_db"
,
default
=
"/pasteur/entites/Mhe/Genomes/WBcel235_genes.db"
,
help
=
"Path to the sqlite database to read or create.
\n
"
"If you want to create it, also provide annotations "
"with option *--annot_file*."
)
parser
.
add_argument
(
"-a"
,
"--annot_file"
,
help
=
"Path to the gtf file containing annotations."
)
parser
.
add_argument
(
"-g"
,
"--gene_list"
,
help
=
"Path to a file containing one gene identifier per line.
\n
"
"The metagene profile will only take into account those genes.
\n
"
"This should not be used at the same time as option -c"
)
parser
.
add_argument
(
"-c"
,
"--bed_coordinates"
,
help
=
"Path to a file containing the coordinates of the features "
"for which to create the meta-profiles.
\n
"
"This should not be used at the same time as option -g"
)
parser
.
add_argument
(
"-b"
,
"--bigwig_files"
,
nargs
=
"+"
,
help
=
"Paths to bigwig files that should be used "
"to represent meta profiles."
)
parser
.
add_argument
(
"-l"
,
"--library_labels"
,
nargs
=
"+"
,
help
=
"Names of the libraries. These should be in the same order "
"as the files given at the -b option."
)
parser
.
add_argument
(
"-p"
,
"--profile_plot"
,
help
=
"Path to the pdf file in wich to write the meta profiles."
)
parser
.
add_argument
(
"-t"
,
"--plot_title"
,
default
=
"Metagene profile"
,
help
=
"Title to use for the metagene plot."
)
parser
.
add_argument
(
"--yMin"
,
default
=
None
,
help
=
"Minimum value for the Y-axis"
)
parser
.
add_argument
(
"--yMax"
,
default
=
None
,
help
=
"Maximum value for the Y-axis"
)
parser
.
add_argument
(
"-e"
,
"--extra_parameters"
,
help
=
"Path to a yaml file containing parameters for the "
"*compute_matrix* function.
\n
"
"possible values are:
\n
"
+
PARAMETER_INFO
)
args
=
parser
.
parse_args
()
if
args
.
extra_parameters
:
with
open
(
args
.
extra_parameters
,
"r"
)
as
params_file
:
extra_parameters
=
valmap
(
fix_none
,
yload
(
params_file
))
else
:
extra_parameters
=
{}
if
args
.
plot_title
:
extra_parameters
[
"group_labels"
]
=
[
args
.
plot_title
]
if
args
.
library_labels
:
extra_parameters
[
"sample_labels"
]
=
args
.
library_labels
if
args
.
yMin
is
not
None
:
extra_parameters
[
"y_min"
]
=
[
float
(
args
.
yMin
)]
if
args
.
yMax
is
not
None
:
extra_parameters
[
"y_max"
]
=
[
float
(
args
.
yMax
)]
if
args
.
profile_plot
:
check_options_msg
=
"Use only one among options -c and -g."
if
args
.
gene_list
:
assert
not
args
.
bed_coordinates
,
check_options_msg
with
open
(
args
.
gene_list
,
"r"
)
as
gene_list_file
:
gene_ids
=
{
gene_id_line
.
strip
()
for
gene_id_line
in
gene_list_file
}
fdb
=
open_feature_db
(
args
.
feature_db
,
args
.
annot_file
,
disable_infer_transcripts
=
True
)
bed_filename
=
bedfile_from_gene_list
(
fdb
,
gene_ids
)
print
(
f
"temporary bed file:
{
bed_filename
}
"
)
if
args
.
bed_coordinates
:
assert
not
args
.
gene_list
,
check_options_msg
bed_filename
=
args
.
bed_coordinates
print
(
f
"using bed file:
{
bed_filename
}
"
)
compute_matrix
(
args
.
bigwig_files
,
bed_filename
,
plot_filename
=
args
.
profile_plot
,
**
extra_parameters
)
if
args
.
gene_list
:
os
.
unlink
(
bed_filename
)
return
0
if
__name__
==
"__main__"
:
sys
.
exit
(
main
())
plotting_scripts/plot_lfclfc_scatter.py
deleted
100755 → 0
View file @
15d0feef
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
"""This script reads data from "tidy" files and makes plots out of
it, at the same scale.
It also outputs a table containing the plotted data points."""
import
argparse
import
os
import
sys
import
warnings
from
operator
import
attrgetter
,
contains
# from functools import partial
import
matplotlib
as
mpl
# To be able to run the script without a defined $DISPLAY
# mpl.use("PDF")
from
matplotlib.backends.backend_pgf
import
FigureCanvasPgf
import
pandas
as
pd
from
cytoolz
import
compose
,
concat
,
curry
from
libhts
import
plot_scatter
from
libworkflows
import
save_plot
,
strip_split
def
formatwarning
(
message
,
category
,
filename
,
lineno
,
line
):
"""Used to format warning messages."""
return
"%s:%s: %s: %s
\n
"
%
(
filename
,
lineno
,
category
.
__name__
,
message
)
warnings
.
formatwarning
=
formatwarning
OPJ
=
os
.
path
.
join
OPB
=
os
.
path
.
basename
# https://stackoverflow.com/a/42768093/1878788
mpl
.
backend_bases
.
register_backend
(
'pdf'
,
FigureCanvasPgf
)
TEX_PARAMS
=
{
"text.usetex"
:
True
,
# use LaTeX to write all text
"pgf.rcfonts"
:
False
,
# Ignore Matplotlibrc
"pgf.texsystem"
:
"lualatex"
,
# hoping to avoid memory issues
"pgf.preamble"
:
[
r
'\usepackage{color}'
# xcolor for colours
]
}
mpl
.
rcParams
.
update
(
TEX_PARAMS
)
# mpl.rcParams["figure.figsize"] = 2, 4
mpl
.
rcParams
[
"font.sans-serif"
]
=
[
"Arial"
,
"Liberation Sans"
,
"Bitstream Vera Sans"
]
mpl
.
rcParams
[
"font.family"
]
=
"sans-serif"
# from matplotlib import numpy as np
# import matplotlib.pyplot as plt
# https://jakevdp.github.io/blog/2014/10/16/how-bad-is-your-colormap/
# def grayify_cmap(cmap):
# """Return a grayscale version of the colormap"""
# cmap = plt.cm.get_cmap(cmap)
# colors = cmap(np.arange(cmap.N))
# # convert RGBA to perceived greyscale luminance
# # cf. http://alienryderflex.com/hsp.html
# rgb_weight = [0.299, 0.587, 0.114]
# luminance = np.sqrt(np.dot(colors[:, :3] ** 2, rgb_weight))
# colors[:, :3] = luminance[:, np.newaxis]
# return cmap.from_list(cmap.name + "_grayscale", colors, cmap.N)
def
get_gene_list
(
filename
):
"""Given the path to a file containing gene identifiers,
extracts the identifiers and the base name of the file.
Returns a pair (list_name, gene_list)."""
(
list_name
,
_
)
=
os
.
path
.
splitext
(
OPB
(
filename
))
if
list_name
[
-
4
:]
==
"_ids"
:
list_name
=
list_name
[:
-
4
]
with
open
(
filename
,
"r"
)
as
infile
:
gene_list
=
[
strip_split
(
line
)[
0
]
for
line
in
infile
.
readlines
()
if
line
[
0
]
!=
"#"
]
return
(
list_name
,
gene_list
)
class
Scatterplot
:
"""A 2-dimension scatterplot."""
__slots__
=
(
"data"
,
"x_label"
,
"y_label"
,
"grouping_col"
)
@
staticmethod
def
fuse_columns
(
row
):
"""Joins text from columns in a pandas DataFrame row with underscores.
Intended to be used with *DataFrame.apply*."""
return
"_"
.
join
(
map
(
str
,
row
.
values
))
def
__init__
(
self
,
x_input_file
,
y_input_file
,
x_column
,
y_column
,
labels
,
extra_cols
=
None
):
if
extra_cols
is
None
:
x_usecols
=
[
"gene"
,
x_column
].
__contains__
y_usecols
=
[
"gene"
,
y_column
].
__contains__
else
:
x_usecols
=
[
"gene"
,
x_column
,
*
extra_cols
].
__contains__
y_usecols
=
[
"gene"
,
y_column
,
*
extra_cols
].
__contains__
x_data
=
pd
.
read_table
(
x_input_file
,
index_col
=
"gene"
,
usecols
=
x_usecols
).
rename
(
columns
=
{
x_column
:
"x"
})
y_data
=
pd
.
read_table
(
y_input_file
,
index_col
=
"gene"
,
usecols
=
y_usecols
).
rename
(
columns
=
{
y_column
:
"y"
})
# Just some experiments
# from cytoolz import merge_with
# from cytoolz.curried import merge_with as cmerge
# common = x_data.index.intersection(y_data.index)
# as_dicts = merge_with(
# cmerge(set),
# x_data.loc[common].to_dict(orient="index"),
# y_data.loc[common].to_dict(orient="index"))
# data = pd.DataFrame(as_dicts).T
self
.
data
=
pd
.
merge
(
x_data
,
y_data
,
left_index
=
True
,
right_index
=
True
,
validate
=
"one_to_one"
)
if
extra_cols
is
not
None
:
extra_cols
=
list
(
concat
((
[
colname
]
if
colname
in
self
.
data
.
columns
else
[
f
"
{
colname
}
_x"
,
f
"
{
colname
}
_y"
]
for
colname
in
extra_cols
)))
# if only one element in extra_cols, it will be overridden:
# "_".join(extra_cols) == extra_cols[0]
if
len
(
extra_cols
)
>
1
:
self
.
data
=
self
.
data
.
assign
(
**
{
"_"
.
join
(
extra_cols
):
self
.
data
[
extra_cols
].
apply
(
Scatterplot
.
fuse_columns
,
axis
=
1
)})
self
.
grouping_col
=
"_"
.
join
(
extra_cols
)
else
:
self
.
grouping_col
=
None
(
self
.
x_label
,
self
.
y_label
)
=
labels
def
apply_selector
(
self
,
selector
,
chose_from
=
None
):
"""Returns a list of gene ids based on a *selector* string.
The *selector* string will be used as a query string on *self.data*.
If *chose_from* is not empty, gene ids will only been selected
if they belong to *chose_from*."""
if
chose_from
is
None
:
chose_from
=
{}
else
:
chose_from
=
set
(
chose_from
)
if
chose_from
:
return
[
gene_id
for
gene_id
in
self
.
data
.
query
(
selector
).
index
if
gene_id
in
chose_from
]
return
[
gene_id
for
gene_id
in
self
.
data
.
query
(
selector
).
index
]
def
plot_maker
(
self
,
grouping
=
None
,
group2colour
=
None
,
**
kwargs
):
"""Builds a plotting function that can colour dots based on them
belonging to a group defined by *grouping*."""
def
plot_lfclfc_scatter
():
"""Generates the scatterplot, returns its legend so that
*save_plot* can include it in the bounding box."""
# fig, axis = plot_scatter(
# print(kwargs["x_range"])
try
:
axis
=
plot_scatter
(
self
.
data
,
"x"
,
"y"
,
grouping
=
grouping
,
group2colour
=
group2colour
,
**
kwargs
)
except
ValueError
as
err
:
if
str
(
err
)
==
"No data to plot."
:
warnings
.
warn
(
"No data to plot."
)
return
None
else
:
raise
# Lines indicating 2-fold threshold.
# Assumes the data are in log2 fold changes
line_style
=
{
"linewidth"
:
0.5
,
"color"
:
"0.5"
,
"linestyle"
:
"dashed"
}
if
"x_range"
in
kwargs
:
(
x_annot_loc
,
_
)
=
kwargs
[
"x_range"
]
else
:
x_annot_loc
=
min
(
self
.
data
.
x
)
if
"y_range"
in
kwargs
:
(
y_annot_loc
,
_
)
=
kwargs
[
"y_range"
]
else
:
y_annot_loc
=
min
(
self
.
data
.
y
)
axis
.
axhline
(
y
=
1
,
**
line_style
)
axis
.
annotate
(
f
"y = 1"
,
xy
=
(
x_annot_loc
,
1
),
xycoords
=
"data"
,
horizontalalignment
=
'left'
,
verticalalignment
=
'bottom'
,
size
=
"x-small"
,
color
=
line_style
[
"color"
])
axis
.
axhline
(
y
=-
1
,
**
line_style
)
axis
.
annotate
(
f
"y = -1"
,
xy
=
(
x_annot_loc
,
-
1
),
xycoords
=
"data"
,
horizontalalignment
=
'left'
,
verticalalignment
=
'top'
,
size
=
"x-small"
,
color
=
line_style
[
"color"
])
axis
.
axvline
(
x
=
1
,
**
line_style
)
axis
.
annotate
(
f
"x = -1"
,
xy
=
(
-
1
,
y_annot_loc
),
xycoords
=
"data"
,
horizontalalignment
=
'right'
,
verticalalignment
=
'bottom'
,
rotation
=
90
,
size
=
"x-small"
,
color
=
line_style
[
"color"
])
axis
.
axvline
(
x
=-
1
,
**
line_style
)
axis
.
annotate
(
f
"x = 1"
,
xy
=
(
1
,
y_annot_loc
),
xycoords
=
"data"
,
horizontalalignment
=
'left'
,
verticalalignment
=
'bottom'
,
rotation
=
90
,
size
=
"x-small"
,
color
=
line_style
[
"color"
])
# Number of genes beyond lfc thresholds, in each quadrant
# up_up = 100 * len(self.data.query(
# f"x > 1 & y > 1")) / len(self.data)
# up_down = 100 * len(self.data.query(
# f"x > 1 & y < 1")) / len(self.data)
# down_up = 100 * len(self.data.query(
# f"x < 1 & y > 1")) / len(self.data)
# down_down = 100 * len(self.data.query(
# f"x < 1 & y < 1")) / len(self.data)