Commit a539c772 authored by Blaise Li's avatar Blaise Li
Browse files

Metaprofile utilities in library metaprof_utils.

parent c79727b3
__copyright__ = "Copyright (C) 2020-2021 Blaise Li"
__licence__ = "GNU GPLv3"
from .metaprof_utils import (
DEFAULT_PARAMETERS,
compute_matrix)
# Copyright (C) 2020-2021 Blaise Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Module containing metaprofile creation utilities based on deeptools.
"""
from copy import deepcopy
from deeptools import heatmapper
from deeptools.plotProfile import Profile
from cytoolz import keyfilter
DEFAULT_PARAMETERS = {
"upstream": 0,
"downstream": 0,
# TODO: change to 2000 ?
"body": 500,
"bin size": 10,
"ref point": None,
"verbose": False,
"bin avg type": "mean",
"missing data as zero": False,
"min threshold": None,
"max threshold": None,
"scale": 1,
"skip zeros": False,
"nan after end": False,
"proc number": 4,
"sort regions": "keep",
"sort using": "mean",
"unscaled 5 prime": 0,
"unscaled 3 prime": 0,
"start_label": "start",
"end_label": "end",
"label_rotation": 90,
}
def is_prof_param(key):
"""Determine if *key* corresponds to a valid parameter for a *Profile*."""
return key in {
"plot_title", "y_axis_label", "y_min", "y_max", "averagetype",
"reference_point_label", "start_label", "end_label",
"plot_height", "plot_width", "per_group",
"plot_type", "image_format", "color_list",
"legend_location", "plots_per_row", "label_rotation", "dpi"}
def compute_matrix(bigwig_filenames,
bed_filename,
plot_filename=None,
**extra_parameters):
"""Combine information from bigwig files *bigwig_filenames* and bed file
*bed_filename*.
If *plot_filename* is set, write the corresponding meta profile
in this file.
"""
parameters = deepcopy(DEFAULT_PARAMETERS)
parameters.update(extra_parameters)
heatm = heatmapper.heatmapper()
heatm.computeMatrix(bigwig_filenames, bed_filename, parameters)
if "sample_labels" in parameters:
heatm.matrix.set_sample_labels(parameters["sample_labels"])
if "group_labels" in parameters:
heatm.matrix.set_group_labels(parameters["group_labels"])
# Fixing parameters (as in heatmapper.read_matrix_file
# and heatmapper.save_matrix)
nb_samples = len(heatm.matrix.sample_labels)
hm_params = dict()
for (key, val) in heatm.parameters.items():
if isinstance(val, list) and not val:
val = None
if key in heatm.special_params and not isinstance(val, list):
val = [val] * nb_samples
if not val:
val = [None] * nb_samples
hm_params[key] = val
heatm.parameters = hm_params
if plot_filename is not None:
print(f"plotting profile to {plot_filename}")
prof_params = keyfilter(is_prof_param, parameters)
prof_params["per_group"] = True
prof = Profile(
heatm, plot_filename,
**prof_params)
prof.plot_profile()
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright (C) 2020 Blaise Li # Copyright (C) 2020-2021 Blaise Li
# #
# This program is free software: you can redistribute it and/or modify # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
...@@ -70,7 +70,7 @@ import argparse ...@@ -70,7 +70,7 @@ import argparse
import os import os
import sys import sys
import warnings import warnings
from copy import deepcopy # from copy import deepcopy
from pathlib import Path from pathlib import Path
from shutil import copyfile from shutil import copyfile
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
...@@ -80,9 +80,14 @@ from sqlite3 import OperationalError ...@@ -80,9 +80,14 @@ from sqlite3 import OperationalError
from yaml import load as yload from yaml import load as yload
# https://pythonhosted.org/gffutils/ # https://pythonhosted.org/gffutils/
from gffutils import FeatureDB, create_db from gffutils import FeatureDB, create_db
from deeptools import heatmapper # from deeptools import heatmapper
from deeptools.plotProfile import Profile # from deeptools.plotProfile import Profile
from cytoolz import keyfilter, valmap # from cytoolz import keyfilter, valmap
from cytoolz import valmap
from metaprof_utils import (
DEFAULT_PARAMETERS,
compute_matrix)
def formatwarning(message, category, filename, lineno, line): def formatwarning(message, category, filename, lineno, line):
...@@ -206,83 +211,83 @@ def fix_none(param_value): ...@@ -206,83 +211,83 @@ def fix_none(param_value):
return param_value return param_value
DEFAULT_PARAMETERS = { # DEFAULT_PARAMETERS = {
"upstream": 0, # "upstream": 0,
"downstream": 0, # "downstream": 0,
# TODO: change to 2000 ? # # TODO: change to 2000 ?
"body": 500, # "body": 500,
"bin size": 10, # "bin size": 10,
"ref point": None, # "ref point": None,
"verbose": False, # "verbose": False,
"bin avg type": "mean", # "bin avg type": "mean",
"missing data as zero": False, # "missing data as zero": False,
"min threshold": None, # "min threshold": None,
"max threshold": None, # "max threshold": None,
"scale": 1, # "scale": 1,
"skip zeros": False, # "skip zeros": False,
"nan after end": False, # "nan after end": False,
"proc number": 4, # "proc number": 4,
"sort regions": "keep", # "sort regions": "keep",
"sort using": "mean", # "sort using": "mean",
"unscaled 5 prime": 0, # "unscaled 5 prime": 0,
"unscaled 3 prime": 0, # "unscaled 3 prime": 0,
"start_label": "start", # "start_label": "start",
"end_label": "end", # "end_label": "end",
"label_rotation": 90, # "label_rotation": 90,
} # }
PARAMETER_INFO = "\n".join(DEFAULT_PARAMETERS.keys()) PARAMETER_INFO = "\n".join(DEFAULT_PARAMETERS.keys())
def is_prof_param(key): # def is_prof_param(key):
"""Determine if *key* corresponds to a valid parameter for a *Profile*.""" # """Determine if *key* corresponds to a valid parameter for a *Profile*."""
return key in { # return key in {
"plot_title", "y_axis_label", "y_min", "y_max", "averagetype", # "plot_title", "y_axis_label", "y_min", "y_max", "averagetype",
"reference_point_label", "start_label", "end_label", # "reference_point_label", "start_label", "end_label",
"plot_height", "plot_width", "per_group", # "plot_height", "plot_width", "per_group",
"plot_type", "image_format", "color_list", # "plot_type", "image_format", "color_list",
"legend_location", "plots_per_row", "label_rotation", "dpi"} # "legend_location", "plots_per_row", "label_rotation", "dpi"}
def compute_matrix(bigwig_filenames, # def compute_matrix(bigwig_filenames,
bed_filename, # bed_filename,
plot_filename=None, # plot_filename=None,
**extra_parameters): # **extra_parameters):
"""Combine information from bigwig files *bigwig_filenames* and bed file # """Combine information from bigwig files *bigwig_filenames* and bed file
*bed_filename*. # *bed_filename*.
#
If *plot_filename* is set, write the corresponding meta profile # If *plot_filename* is set, write the corresponding meta profile
in this file. # in this file.
""" # """
parameters = deepcopy(DEFAULT_PARAMETERS) # parameters = deepcopy(DEFAULT_PARAMETERS)
parameters.update(extra_parameters) # parameters.update(extra_parameters)
heatm = heatmapper.heatmapper() # heatm = heatmapper.heatmapper()
heatm.computeMatrix(bigwig_filenames, bed_filename, parameters) # heatm.computeMatrix(bigwig_filenames, bed_filename, parameters)
if "sample_labels" in parameters: # if "sample_labels" in parameters:
heatm.matrix.set_sample_labels(parameters["sample_labels"]) # heatm.matrix.set_sample_labels(parameters["sample_labels"])
if "group_labels" in parameters: # if "group_labels" in parameters:
heatm.matrix.set_group_labels(parameters["group_labels"]) # heatm.matrix.set_group_labels(parameters["group_labels"])
# Fixing parameters (as in heatmapper.read_matrix_file # # Fixing parameters (as in heatmapper.read_matrix_file
# and heatmapper.save_matrix) # # and heatmapper.save_matrix)
nb_samples = len(heatm.matrix.sample_labels) # nb_samples = len(heatm.matrix.sample_labels)
hm_params = dict() # hm_params = dict()
for (key, val) in heatm.parameters.items(): # for (key, val) in heatm.parameters.items():
if isinstance(val, list) and not val: # if isinstance(val, list) and not val:
val = None # val = None
if key in heatm.special_params and not isinstance(val, list): # if key in heatm.special_params and not isinstance(val, list):
val = [val] * nb_samples # val = [val] * nb_samples
if not val: # if not val:
val = [None] * nb_samples # val = [None] * nb_samples
hm_params[key] = val # hm_params[key] = val
heatm.parameters = hm_params # heatm.parameters = hm_params
#
if plot_filename is not None: # if plot_filename is not None:
print(f"plotting profile to {plot_filename}") # print(f"plotting profile to {plot_filename}")
prof_params = keyfilter(is_prof_param, parameters) # prof_params = keyfilter(is_prof_param, parameters)
prof_params["per_group"] = True # prof_params["per_group"] = True
prof = Profile( # prof = Profile(
heatm, plot_filename, # heatm, plot_filename,
**prof_params) # **prof_params)
prof.plot_profile() # prof.plot_profile()
# def get_transcript_structure(fdb, transcript): # def get_transcript_structure(fdb, transcript):
......
...@@ -17,7 +17,7 @@ from setuptools import setup, find_packages ...@@ -17,7 +17,7 @@ from setuptools import setup, find_packages
name = "plotting_scripts" name = "plotting_scripts"
__version__ = "0.1" __version__ = "0.2"
setup( setup(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment