diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 1058a270a76ba..57cd67675893b 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -140,9 +140,10 @@ "metadata": {}, "outputs": [], "source": [ - "s = df.style.set_table_attributes('class=\"table-cls\"')\n", - "cls = pd.DataFrame(data=[['cls1', None], ['cls3', 'cls2 cls3']], index=[0,2], columns=['A', 'C'])\n", - "s.set_td_classes(cls)" + "css_classes = pd.DataFrame(data=[['cls1', None], ['cls3', 'cls2 cls3']], index=[0,2], columns=['A', 'C'])\n", + "df.style.\\\n", + " set_table_attributes('class=\"table-cls\"').\\\n", + " set_td_classes(css_classes)" ] }, { @@ -314,13 +315,10 @@ "outputs": [], "source": [ "def color_negative_red(val):\n", - " \"\"\"\n", - " Takes a scalar and returns a string with\n", - " the css property `'color: red'` for negative\n", - " strings, black otherwise.\n", - " \"\"\"\n", - " color = 'red' if val < 0 else 'black'\n", - " return 'color: %s' % color" + " \"\"\"Color negative scalars red.\"\"\"\n", + " css = 'color: red;'\n", + " if val < 0: return css\n", + " return None" ] }, { @@ -368,11 +366,9 @@ "outputs": [], "source": [ "def highlight_max(s):\n", - " '''\n", - " highlight the maximum in a Series yellow.\n", - " '''\n", - " is_max = s == s.max()\n", - " return ['background-color: yellow' if v else '' for v in is_max]" + " \"\"\"Highlight the maximum in a Series bold-orange.\"\"\"\n", + " css = 'background-color: orange; font-weight: bold;'\n", + " return np.where(s == np.nanmax(s.values), css, None)" ] }, { @@ -384,11 +380,20 @@ "df.style.apply(highlight_max)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style.apply(highlight_max, axis=1)" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In this case the input is a `Series`, one column at a time.\n", + "In this case the input is a `Series`, one column (or row) at a time.\n", "Notice that the output shape of `highlight_max` matches the input shape, an array with `len(s)` items." ] }, @@ -406,8 +411,8 @@ "outputs": [], "source": [ "def compare_col(s, comparator=None):\n", - " attr = 'background-color: #00BFFF;'\n", - " return np.where(s < comparator, attr, '')" + " css = 'background-color: #00BFFF;'\n", + " return np.where(s < comparator, css, None)" ] }, { @@ -442,41 +447,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Above we used `Styler.apply` to pass in each column one at a time.\n", + "Above we used `Styler.apply` to pass in each column (or row) one at a time.\n", "\n", "*Debugging Tip*: If you're having trouble writing your style function, try just passing it into DataFrame.apply. Internally, Styler.apply uses DataFrame.apply so the result should be the same.\n", "\n", "What if you wanted to highlight just the maximum value in the entire table?\n", - "Use `.apply(function, axis=None)` to indicate that your function wants the entire table, not one column or row at a time. Let's try that next.\n", - "\n", - "We'll rewrite our `highlight-max` to handle either Series (from `.apply(axis=0 or 1)`) or DataFrames (from `.apply(axis=None)`). We'll also allow the color to be adjustable, to demonstrate that `.apply`, and `.applymap` pass along keyword arguments." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def highlight_max(data, color='yellow'):\n", - " '''\n", - " highlight the maximum in a Series or DataFrame\n", - " '''\n", - " attr = 'background-color: {}'.format(color)\n", - " if data.ndim == 1: # Series from .apply(axis=0) or axis=1\n", - " is_max = data == data.max()\n", - " return [attr if v else '' for v in is_max]\n", - " else: # from .apply(axis=None)\n", - " is_max = data == data.max().max()\n", - " return pd.DataFrame(np.where(is_max, attr, ''),\n", - " index=data.index, columns=data.columns)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "When using ``Styler.apply(func, axis=None)``, the function must return a DataFrame with the same index and column labels." + "Use `.apply(function, axis=None)` to indicate that your function wants the entire table, not one column or row at a time. In this case the return must be a DataFrame or ndarray of the same shape as the input. Let's try that next. " ] }, { @@ -485,7 +461,7 @@ "metadata": {}, "outputs": [], "source": [ - "s = df.style.apply(highlight_max, color='darkorange', axis=None)\n", + "s = df.style.apply(highlight_max, axis=None)\n", "s" ] }, diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 75bca020fd78f..60e3cd9bb344f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -53,8 +53,9 @@ Other enhancements - :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) - :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`) - :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`) -- :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`) +- :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`, :issue:`21266`, :issue:`39317`) - :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) +- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None``. (:issue:`39359`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3d9eb4e96f78a..8db067cbc4e8f 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -846,11 +846,19 @@ def _apply( else: result = func(data, **kwargs) if not isinstance(result, pd.DataFrame): - raise TypeError( - f"Function {repr(func)} must return a DataFrame when " - f"passed to `Styler.apply` with axis=None" - ) - if not ( + if not isinstance(result, np.ndarray): + raise TypeError( + f"Function {repr(func)} must return a DataFrame or ndarray " + f"when passed to `Styler.apply` with axis=None" + ) + if not (data.shape == result.shape): + raise ValueError( + f"Function {repr(func)} returned ndarray with wrong shape.\n" + f"Result has shape: {result.shape}\n" + f"Expected shape: {data.shape}" + ) + result = DataFrame(result, index=data.index, columns=data.columns) + elif not ( result.index.equals(data.index) and result.columns.equals(data.columns) ): raise ValueError( @@ -858,13 +866,11 @@ def _apply( f"index and columns as the input" ) - result_shape = result.shape - expected_shape = self.data.loc[subset].shape - if result_shape != expected_shape: + if result.shape != data.shape: raise ValueError( f"Function {repr(func)} returned the wrong shape.\n" f"Result has shape: {result.shape}\n" - f"Expected shape: {expected_shape}" + f"Expected shape: {data.shape}" ) self._update_ctx(result) return self @@ -877,7 +883,7 @@ def apply( **kwargs, ) -> Styler: """ - Apply a function column-wise, row-wise, or table-wise. + Apply a CSS-styling function column-wise, row-wise, or table-wise. Updates the HTML representation with the result. @@ -887,7 +893,10 @@ def apply( ``func`` should take a Series or DataFrame (depending on ``axis``), and return an object with the same shape. Must return a DataFrame with identical index and - column labels when ``axis=None``. + column labels or an ndarray with same shape as input when ``axis=None``. + + .. versionchanged:: 1.3.0 + axis : {0 or 'index', 1 or 'columns', None}, default 0 Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once @@ -904,9 +913,11 @@ def apply( Notes ----- - The output shape of ``func`` should match the input, i.e. if + The output of ``func`` should be elements having CSS style as string or, + if nothing is to be applied to that element, an empty string or ``None``. + The output shape must match the input, i.e. if ``x`` is the input row, column, or table (depending on ``axis``), - then ``func(x).shape == x.shape`` should be true. + then ``func(x).shape == x.shape`` should be ``True``. This is similar to ``DataFrame.apply``, except that ``axis=None`` applies the function to the entire DataFrame at once, @@ -914,12 +925,12 @@ def apply( Examples -------- - >>> def highlight_max(x): - ... return ['background-color: yellow' if v == x.max() else '' - for v in x] - ... + >>> def highlight_max(x, color): + ... return np.where(x == np.nanmax(x.values), f"color: {color};", None) >>> df = pd.DataFrame(np.random.randn(5, 2)) - >>> df.style.apply(highlight_max) + >>> df.style.apply(highlight_max, color='red') + >>> df.style.apply(highlight_max, color='blue', axis=1) + >>> df.style.apply(highlight_max, color='green', axis=None) """ self._todo.append( (lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs) @@ -937,7 +948,7 @@ def _applymap(self, func: Callable, subset=None, **kwargs) -> Styler: def applymap(self, func: Callable, subset=None, **kwargs) -> Styler: """ - Apply a function elementwise. + Apply a CSS-styling function elementwise. Updates the HTML representation with the result. @@ -959,6 +970,18 @@ def applymap(self, func: Callable, subset=None, **kwargs) -> Styler: -------- Styler.where: Updates the HTML representation with a style which is selected in accordance with the return value of a function. + + Notes + ----- + The output of ``func`` should be a CSS style as string or, if nothing is to be + applied, an empty string or ``None``. + + Examples + -------- + >>> def color_negative(v, color): + ... return f"color: {color};" if v < 0 else None + >>> df = pd.DataFrame(np.random.randn(5, 2)) + >>> df.style.applymap(color_negative, color='red') """ self._todo.append( (lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 0bd0c5bd87761..a154e51f68dba 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1395,12 +1395,19 @@ def test_bad_apply_shape(self): with pytest.raises(ValueError, match=msg): df.style._apply(lambda x: ["", "", ""], axis=1) + msg = "returned ndarray with wrong shape" + with pytest.raises(ValueError, match=msg): + df.style._apply(lambda x: np.array([[""], [""]]), axis=None) + def test_apply_bad_return(self): def f(x): return "" df = DataFrame([[1, 2], [3, 4]]) - msg = "must return a DataFrame when passed to `Styler.apply` with axis=None" + msg = ( + "must return a DataFrame or ndarray when passed to `Styler.apply` " + "with axis=None" + ) with pytest.raises(TypeError, match=msg): df.style._apply(f, axis=None)