From 2e3d6ddb29294d4c89fef2818b5749577373bb98 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Nov 2023 13:05:44 -0800 Subject: [PATCH 1/5] REF: less state in scatterplot --- pandas/plotting/_matplotlib/core.py | 43 +++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index d59220a1f97f8..081d311263682 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -22,6 +22,7 @@ import matplotlib as mpl import numpy as np +from pandas._libs import lib from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level @@ -1269,14 +1270,30 @@ class ScatterPlot(PlanePlot): def _kind(self) -> Literal["scatter"]: return "scatter" - def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None: + def __init__( + self, + data, + x, + y, + s=None, + c=None, + *, + colorbar: bool | lib.NoDefault = lib.no_default, + norm=None, + **kwargs, + ) -> None: if s is None: # hide the matplotlib default for size, in case we want to change # the handling of this argument later s = 20 elif is_hashable(s) and s in data.columns: s = data[s] - super().__init__(data, x, y, s=s, **kwargs) + self.s = s + + self.colorbar = colorbar + self.norm = norm + + super().__init__(data, x, y, **kwargs) if is_integer(c) and not self.data.columns._holds_integer(): c = self.data.columns[c] self.c = c @@ -1323,13 +1340,12 @@ def _make_plot(self, fig: Figure): cmap = colors.ListedColormap([cmap(i) for i in range(cmap.N)]) bounds = np.linspace(0, n_cats, n_cats + 1) norm = colors.BoundaryNorm(bounds, cmap.N) + assert "norm" not in self.kwds else: - norm = self.kwds.pop("norm", None) - # plot colorbar if - # 1. colormap is assigned, and - # 2.`c` is a column containing only numeric values - plot_colorbar = self.colormap or c_is_column - cb = self.kwds.pop("colorbar", is_numeric_dtype(c_values) and plot_colorbar) + # TODO: warn if norm is passed and we are silently ignoring it? + norm = self.norm + + cb = self._get_colorbar(c_values, c_is_column) if self.legend and hasattr(self, "label"): label = self.label @@ -1342,6 +1358,7 @@ def _make_plot(self, fig: Figure): label=label, cmap=cmap, norm=norm, + s=self.s, **self.kwds, ) if cb: @@ -1363,6 +1380,16 @@ def _make_plot(self, fig: Figure): err_kwds["ecolor"] = scatter.get_facecolor()[0] ax.errorbar(data[x].values, data[y].values, linestyle="none", **err_kwds) + def _get_colorbar(self, c_values, c_is_column: bool) -> bool: + # plot colorbar if + # 1. colormap is assigned, and + # 2.`c` is a column containing only numeric values + plot_colorbar = self.colormap or c_is_column + cb = self.colorbar + if cb is lib.no_default: + return is_numeric_dtype(c_values) and plot_colorbar + return cb + class HexBinPlot(PlanePlot): @property From 877367ab3d7edae4c73c74e49784f2fbd60d4e72 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Nov 2023 15:32:45 -0800 Subject: [PATCH 2/5] REF: helper --- pandas/plotting/_matplotlib/core.py | 51 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 081d311263682..a07e8393d3e9e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1322,29 +1322,7 @@ def _make_plot(self, fig: Figure): else: c_values = c - if self.colormap is not None: - cmap = mpl.colormaps.get_cmap(self.colormap) - # cmap is only used if c_values are integers, otherwise UserWarning. - # GH-53908: additionally call isinstance() because is_integer_dtype - # returns True for "b" (meaning "blue" and not int8 in this context) - elif not isinstance(c_values, str) and is_integer_dtype(c_values): - # pandas uses colormap, matplotlib uses cmap. - cmap = mpl.colormaps["Greys"] - else: - cmap = None - - if color_by_categorical: - from matplotlib import colors - - n_cats = len(self.data[c].cat.categories) - cmap = colors.ListedColormap([cmap(i) for i in range(cmap.N)]) - bounds = np.linspace(0, n_cats, n_cats + 1) - norm = colors.BoundaryNorm(bounds, cmap.N) - assert "norm" not in self.kwds - else: - # TODO: warn if norm is passed and we are silently ignoring it? - norm = self.norm - + norm, cmap = self._get_norm_and_cmap(c_values, color_by_categorical) cb = self._get_colorbar(c_values, c_is_column) if self.legend and hasattr(self, "label"): @@ -1365,6 +1343,7 @@ def _make_plot(self, fig: Figure): cbar_label = c if c_is_column else "" cbar = self._plot_colorbar(ax, fig=fig, label=cbar_label) if color_by_categorical: + n_cats = len(self.data[c].cat.categories) cbar.set_ticks(np.linspace(0.5, n_cats - 0.5, n_cats)) cbar.ax.set_yticklabels(self.data[c].cat.categories) @@ -1380,6 +1359,32 @@ def _make_plot(self, fig: Figure): err_kwds["ecolor"] = scatter.get_facecolor()[0] ax.errorbar(data[x].values, data[y].values, linestyle="none", **err_kwds) + def _get_norm_and_cmap(self, c_values, color_by_categorical: bool): + c = self.c + if self.colormap is not None: + cmap = mpl.colormaps.get_cmap(self.colormap) + # cmap is only used if c_values are integers, otherwise UserWarning. + # GH-53908: additionally call isinstance() because is_integer_dtype + # returns True for "b" (meaning "blue" and not int8 in this context) + elif not isinstance(c_values, str) and is_integer_dtype(c_values): + # pandas uses colormap, matplotlib uses cmap. + cmap = mpl.colormaps["Greys"] + else: + cmap = None + + if color_by_categorical: + from matplotlib import colors + + n_cats = len(self.data[c].cat.categories) + cmap = colors.ListedColormap([cmap(i) for i in range(cmap.N)]) + bounds = np.linspace(0, n_cats, n_cats + 1) + norm = colors.BoundaryNorm(bounds, cmap.N) + # TODO: warn that we are ignoring self.norm if user specified it? + # Doesn't happen in any tests 2023-11-09 + else: + norm = self.norm + return norm, cmap + def _get_colorbar(self, c_values, c_is_column: bool) -> bool: # plot colorbar if # 1. colormap is assigned, and From e19f9da2d2bb2b272cb09aeced2ac308930da492 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Nov 2023 15:37:07 -0800 Subject: [PATCH 3/5] REF: helper --- pandas/plotting/_matplotlib/core.py | 30 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a07e8393d3e9e..e20b436e8fdd8 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1309,19 +1309,7 @@ def _make_plot(self, fig: Figure): ) color = self.kwds.pop("color", None) - if c is not None and color is not None: - raise TypeError("Specify exactly one of `c` and `color`") - if c is None and color is None: - c_values = self.plt.rcParams["patch.facecolor"] - elif color is not None: - c_values = color - elif color_by_categorical: - c_values = self.data[c].cat.codes - elif c_is_column: - c_values = self.data[c].values - else: - c_values = c - + c_values = self._get_c_values(color, color_by_categorical, c_is_column) norm, cmap = self._get_norm_and_cmap(c_values, color_by_categorical) cb = self._get_colorbar(c_values, c_is_column) @@ -1359,6 +1347,22 @@ def _make_plot(self, fig: Figure): err_kwds["ecolor"] = scatter.get_facecolor()[0] ax.errorbar(data[x].values, data[y].values, linestyle="none", **err_kwds) + def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool): + c = self.c + if c is not None and color is not None: + raise TypeError("Specify exactly one of `c` and `color`") + if c is None and color is None: + c_values = self.plt.rcParams["patch.facecolor"] + elif color is not None: + c_values = color + elif color_by_categorical: + c_values = self.data[c].cat.codes + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + return c_values + def _get_norm_and_cmap(self, c_values, color_by_categorical: bool): c = self.c if self.colormap is not None: From 2dd0f7ff91ecfaf6d2deb8116e000e6d9f783b7e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Nov 2023 15:48:48 -0800 Subject: [PATCH 4/5] less state --- pandas/plotting/_matplotlib/core.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index e20b436e8fdd8..5d6a85fe36c0f 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1222,13 +1222,6 @@ def __init__(self, data, x, y, **kwargs) -> None: if is_integer(y) and not self.data.columns._holds_integer(): y = self.data.columns[y] - # Scatter plot allows to plot objects data - if self._kind == "hexbin": - if len(self.data[x]._get_numeric_data()) == 0: - raise ValueError(self._kind + " requires x column to be numeric") - if len(self.data[y]._get_numeric_data()) == 0: - raise ValueError(self._kind + " requires y column to be numeric") - self.x = x self.y = y @@ -1405,19 +1398,27 @@ class HexBinPlot(PlanePlot): def _kind(self) -> Literal["hexbin"]: return "hexbin" - def __init__(self, data, x, y, C=None, **kwargs) -> None: + def __init__(self, data, x, y, C=None, *, colorbar: bool = True, **kwargs) -> None: super().__init__(data, x, y, **kwargs) if is_integer(C) and not self.data.columns._holds_integer(): C = self.data.columns[C] self.C = C + self.colorbar = colorbar + + # Scatter plot allows to plot objects data + if len(self.data[self.x]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires x column to be numeric") + if len(self.data[self.y]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires y column to be numeric") + def _make_plot(self, fig: Figure) -> None: x, y, data, C = self.x, self.y, self.data, self.C ax = self.axes[0] # pandas uses colormap, matplotlib uses cmap. cmap = self.colormap or "BuGn" cmap = mpl.colormaps.get_cmap(cmap) - cb = self.kwds.pop("colorbar", True) + cb = self.colorbar if C is None: c_values = None From b54a46a6df8742c6614444e9b7d09a571ff074e3 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Nov 2023 16:52:21 -0800 Subject: [PATCH 5/5] REF: less state --- pandas/plotting/_matplotlib/core.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 5d6a85fe36c0f..31b61906cac53 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1306,7 +1306,7 @@ def _make_plot(self, fig: Figure): norm, cmap = self._get_norm_and_cmap(c_values, color_by_categorical) cb = self._get_colorbar(c_values, c_is_column) - if self.legend and hasattr(self, "label"): + if self.legend: label = self.label else: label = None @@ -1330,8 +1330,6 @@ def _make_plot(self, fig: Figure): if label is not None: self._append_legend_handles_labels(scatter, label) - else: - self.legend = False errors_x = self._get_errorbars(label=x, index=0, yerr=False) errors_y = self._get_errorbars(label=y, index=0, xerr=False)