diff --git a/plot_scatterplot.py b/plot_scatterplot.py index 9478e636f954941854e9c404d9519fdca390eae9..b4d43781e4bd1fc52d57cf24b85a147107ff3d81 100755 --- a/plot_scatterplot.py +++ b/plot_scatterplot.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # vim: set fileencoding=<utf-8> : -"""This script reads data from "tidy" files and makes plots out of -it, at the same scale. +"""This script reads data from "tidy" files and makes a scatter plot out of it. It also outputs a table containing the plotted data points.""" import argparse @@ -91,12 +90,19 @@ class Scatterplot: y_column, labels, extra_cols=None): + # usecols can be a callable to filter column names: + # If callable, the callable function will be evaluated against the + # column names, returning names where the callable function evaluates + # to True. if extra_cols is None: x_usecols = ["gene", x_column].__contains__ y_usecols = ["gene", y_column].__contains__ else: x_usecols = ["gene", x_column, *extra_cols].__contains__ y_usecols = ["gene", y_column, *extra_cols].__contains__ + # The columns containing the data to plot might have the same name + # in the two tables. + # We rename them to x and y for simplicity. x_data = pd.read_csv( x_input_file, sep="\t", index_col="gene", usecols=x_usecols).rename( columns={x_column: "x"}) @@ -115,6 +121,7 @@ class Scatterplot: self.data = pd.merge( x_data, y_data, left_index=True, right_index=True, validate="one_to_one") + # Compute a classifier column (to be used to colour points) if extra_cols is not None: extra_cols = list(concat(( [colname] if colname in self.data.columns @@ -148,7 +155,7 @@ class Scatterplot: def plot_maker(self, grouping=None, group2colour=None, **kwargs): """Builds a plotting function that can colour dots based on them belonging to a group defined by *grouping*.""" - def plot_lfclfc_scatter(): + def plotting_function(): """Generates the scatterplot, returns its legend so that *save_plot* can include it in the bounding box.""" # fig, axis = plot_scatter( @@ -297,9 +304,11 @@ class Scatterplot: # TODO: force ticks to be integers # Return a tuple of "extra artists", # to correctly define the bounding box - return plot_lfclfc_scatter + return plotting_function - def save_plot(self, outfile, grouping=None, group2colour=None, **kwargs): + def save_plot(self, outfile, + grouping=None, group2colour=None, + **kwargs): """Creates the plotting function and transmits it for execution to the function that really does the saving.""" if grouping is None and self.grouping_col is not None: