diff --git a/metaprof_utils/__init__.py b/metaprof_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a00bb4c12558e6e9ccc03906a73edbe35b78022a --- /dev/null +++ b/metaprof_utils/__init__.py @@ -0,0 +1,5 @@ +__copyright__ = "Copyright (C) 2020-2021 Blaise Li" +__licence__ = "GNU GPLv3" +from .metaprof_utils import ( + DEFAULT_PARAMETERS, + compute_matrix) diff --git a/metaprof_utils/metaprof_utils.py b/metaprof_utils/metaprof_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5f1f9d336be144a0186d7c6acb04b01681b9adf8 --- /dev/null +++ b/metaprof_utils/metaprof_utils.py @@ -0,0 +1,99 @@ +# Copyright (C) 2020-2021 Blaise Li +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +""" +Module containing metaprofile creation utilities based on deeptools. +""" +from copy import deepcopy +from deeptools import heatmapper +from deeptools.plotProfile import Profile +from cytoolz import keyfilter + + +DEFAULT_PARAMETERS = { + "upstream": 0, + "downstream": 0, + # TODO: change to 2000 ? + "body": 500, + "bin size": 10, + "ref point": None, + "verbose": False, + "bin avg type": "mean", + "missing data as zero": False, + "min threshold": None, + "max threshold": None, + "scale": 1, + "skip zeros": False, + "nan after end": False, + "proc number": 4, + "sort regions": "keep", + "sort using": "mean", + "unscaled 5 prime": 0, + "unscaled 3 prime": 0, + "start_label": "start", + "end_label": "end", + "label_rotation": 90, +} + + +def is_prof_param(key): + """Determine if *key* corresponds to a valid parameter for a *Profile*.""" + return key in { + "plot_title", "y_axis_label", "y_min", "y_max", "averagetype", + "reference_point_label", "start_label", "end_label", + "plot_height", "plot_width", "per_group", + "plot_type", "image_format", "color_list", + "legend_location", "plots_per_row", "label_rotation", "dpi"} + + +def compute_matrix(bigwig_filenames, + bed_filename, + plot_filename=None, + **extra_parameters): + """Combine information from bigwig files *bigwig_filenames* and bed file + *bed_filename*. + + If *plot_filename* is set, write the corresponding meta profile + in this file. + """ + parameters = deepcopy(DEFAULT_PARAMETERS) + parameters.update(extra_parameters) + heatm = heatmapper.heatmapper() + heatm.computeMatrix(bigwig_filenames, bed_filename, parameters) + if "sample_labels" in parameters: + heatm.matrix.set_sample_labels(parameters["sample_labels"]) + if "group_labels" in parameters: + heatm.matrix.set_group_labels(parameters["group_labels"]) + # Fixing parameters (as in heatmapper.read_matrix_file + # and heatmapper.save_matrix) + nb_samples = len(heatm.matrix.sample_labels) + hm_params = dict() + for (key, val) in heatm.parameters.items(): + if isinstance(val, list) and not val: + val = None + if key in heatm.special_params and not isinstance(val, list): + val = [val] * nb_samples + if not val: + val = [None] * nb_samples + hm_params[key] = val + heatm.parameters = hm_params + + if plot_filename is not None: + print(f"plotting profile to {plot_filename}") + prof_params = keyfilter(is_prof_param, parameters) + prof_params["per_group"] = True + prof = Profile( + heatm, plot_filename, + **prof_params) + prof.plot_profile() diff --git a/scripts/create_metagene_profile.py b/scripts/create_metagene_profile.py index 3b185a4c5e0ed53d1e1ea7bd84e1a73d201c2a39..6e95ce41d61690b3f449e0e6d15faec0c56084ea 100755 --- a/scripts/create_metagene_profile.py +++ b/scripts/create_metagene_profile.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (C) 2020 Blaise Li +# Copyright (C) 2020-2021 Blaise Li # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -70,7 +70,7 @@ import argparse import os import sys import warnings -from copy import deepcopy +# from copy import deepcopy from pathlib import Path from shutil import copyfile from tempfile import NamedTemporaryFile @@ -80,9 +80,14 @@ from sqlite3 import OperationalError from yaml import load as yload # https://pythonhosted.org/gffutils/ from gffutils import FeatureDB, create_db -from deeptools import heatmapper -from deeptools.plotProfile import Profile -from cytoolz import keyfilter, valmap +# from deeptools import heatmapper +# from deeptools.plotProfile import Profile +# from cytoolz import keyfilter, valmap +from cytoolz import valmap + +from metaprof_utils import ( + DEFAULT_PARAMETERS, + compute_matrix) def formatwarning(message, category, filename, lineno, line): @@ -206,83 +211,83 @@ def fix_none(param_value): return param_value -DEFAULT_PARAMETERS = { - "upstream": 0, - "downstream": 0, - # TODO: change to 2000 ? - "body": 500, - "bin size": 10, - "ref point": None, - "verbose": False, - "bin avg type": "mean", - "missing data as zero": False, - "min threshold": None, - "max threshold": None, - "scale": 1, - "skip zeros": False, - "nan after end": False, - "proc number": 4, - "sort regions": "keep", - "sort using": "mean", - "unscaled 5 prime": 0, - "unscaled 3 prime": 0, - "start_label": "start", - "end_label": "end", - "label_rotation": 90, -} +# DEFAULT_PARAMETERS = { +# "upstream": 0, +# "downstream": 0, +# # TODO: change to 2000 ? +# "body": 500, +# "bin size": 10, +# "ref point": None, +# "verbose": False, +# "bin avg type": "mean", +# "missing data as zero": False, +# "min threshold": None, +# "max threshold": None, +# "scale": 1, +# "skip zeros": False, +# "nan after end": False, +# "proc number": 4, +# "sort regions": "keep", +# "sort using": "mean", +# "unscaled 5 prime": 0, +# "unscaled 3 prime": 0, +# "start_label": "start", +# "end_label": "end", +# "label_rotation": 90, +# } PARAMETER_INFO = "\n".join(DEFAULT_PARAMETERS.keys()) -def is_prof_param(key): - """Determine if *key* corresponds to a valid parameter for a *Profile*.""" - return key in { - "plot_title", "y_axis_label", "y_min", "y_max", "averagetype", - "reference_point_label", "start_label", "end_label", - "plot_height", "plot_width", "per_group", - "plot_type", "image_format", "color_list", - "legend_location", "plots_per_row", "label_rotation", "dpi"} - - -def compute_matrix(bigwig_filenames, - bed_filename, - plot_filename=None, - **extra_parameters): - """Combine information from bigwig files *bigwig_filenames* and bed file - *bed_filename*. - - If *plot_filename* is set, write the corresponding meta profile - in this file. - """ - parameters = deepcopy(DEFAULT_PARAMETERS) - parameters.update(extra_parameters) - heatm = heatmapper.heatmapper() - heatm.computeMatrix(bigwig_filenames, bed_filename, parameters) - if "sample_labels" in parameters: - heatm.matrix.set_sample_labels(parameters["sample_labels"]) - if "group_labels" in parameters: - heatm.matrix.set_group_labels(parameters["group_labels"]) - # Fixing parameters (as in heatmapper.read_matrix_file - # and heatmapper.save_matrix) - nb_samples = len(heatm.matrix.sample_labels) - hm_params = dict() - for (key, val) in heatm.parameters.items(): - if isinstance(val, list) and not val: - val = None - if key in heatm.special_params and not isinstance(val, list): - val = [val] * nb_samples - if not val: - val = [None] * nb_samples - hm_params[key] = val - heatm.parameters = hm_params - - if plot_filename is not None: - print(f"plotting profile to {plot_filename}") - prof_params = keyfilter(is_prof_param, parameters) - prof_params["per_group"] = True - prof = Profile( - heatm, plot_filename, - **prof_params) - prof.plot_profile() +# def is_prof_param(key): +# """Determine if *key* corresponds to a valid parameter for a *Profile*.""" +# return key in { +# "plot_title", "y_axis_label", "y_min", "y_max", "averagetype", +# "reference_point_label", "start_label", "end_label", +# "plot_height", "plot_width", "per_group", +# "plot_type", "image_format", "color_list", +# "legend_location", "plots_per_row", "label_rotation", "dpi"} + + +# def compute_matrix(bigwig_filenames, +# bed_filename, +# plot_filename=None, +# **extra_parameters): +# """Combine information from bigwig files *bigwig_filenames* and bed file +# *bed_filename*. +# +# If *plot_filename* is set, write the corresponding meta profile +# in this file. +# """ +# parameters = deepcopy(DEFAULT_PARAMETERS) +# parameters.update(extra_parameters) +# heatm = heatmapper.heatmapper() +# heatm.computeMatrix(bigwig_filenames, bed_filename, parameters) +# if "sample_labels" in parameters: +# heatm.matrix.set_sample_labels(parameters["sample_labels"]) +# if "group_labels" in parameters: +# heatm.matrix.set_group_labels(parameters["group_labels"]) +# # Fixing parameters (as in heatmapper.read_matrix_file +# # and heatmapper.save_matrix) +# nb_samples = len(heatm.matrix.sample_labels) +# hm_params = dict() +# for (key, val) in heatm.parameters.items(): +# if isinstance(val, list) and not val: +# val = None +# if key in heatm.special_params and not isinstance(val, list): +# val = [val] * nb_samples +# if not val: +# val = [None] * nb_samples +# hm_params[key] = val +# heatm.parameters = hm_params +# +# if plot_filename is not None: +# print(f"plotting profile to {plot_filename}") +# prof_params = keyfilter(is_prof_param, parameters) +# prof_params["per_group"] = True +# prof = Profile( +# heatm, plot_filename, +# **prof_params) +# prof.plot_profile() # def get_transcript_structure(fdb, transcript): diff --git a/setup.py b/setup.py index 4e2acc3fb2300ff965dfe56819123498e8825fae..2df66a2058764b23e4506c7e3aa1cec2c5aa4503 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ from setuptools import setup, find_packages name = "plotting_scripts" -__version__ = "0.1" +__version__ = "0.2" setup(