diff --git a/doc/source/_static/style/hq_ax1.png b/doc/source/_static/style/hq_ax1.png new file mode 100644 index 0000000000000..95d840b7c8f99 Binary files /dev/null and b/doc/source/_static/style/hq_ax1.png differ diff --git a/doc/source/_static/style/hq_axNone.png b/doc/source/_static/style/hq_axNone.png new file mode 100644 index 0000000000000..40a33b194e640 Binary files /dev/null and b/doc/source/_static/style/hq_axNone.png differ diff --git a/doc/source/_static/style/hq_props.png b/doc/source/_static/style/hq_props.png new file mode 100644 index 0000000000000..1f11749096690 Binary files /dev/null and b/doc/source/_static/style/hq_props.png differ diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 85d9acff353be..bba71b0d62e92 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -119,7 +119,9 @@ to accept more universal CSS language for arguments, such as ``'color:red;'`` in to allow custom CSS highlighting instead of default background coloring (:issue:`40242`). Enhancements to other built-in methods include extending the :meth:`.Styler.background_gradient` method to shade elements based on a given gradient map and not be restricted only to -values in the DataFrame (:issue:`39930` :issue:`22727` :issue:`28901`). +values in the DataFrame (:issue:`39930` :issue:`22727` :issue:`28901`). Additional +built-in methods such as :meth:`.Styler.highlight_between` and :meth:`.Styler.highlight_quantile` +have been added (:issue:`39821` and :issue:`40926`). The :meth:`.Styler.apply` now consistently allows functions with ``ndarray`` output to allow more flexible development of UDFs when ``axis`` is ``None`` ``0`` or ``1`` (:issue:`39393`). diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f51f81d7c3504..7998365234682 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1355,6 +1355,7 @@ def highlight_null( Styler.highlight_max: Highlight the maximum with a style. Styler.highlight_min: Highlight the minimum with a style. Styler.highlight_between: Highlight a defined range with a style. + Styler.highlight_quantile: Highlight values defined by a quantile with a style. """ def f(data: DataFrame, props: str) -> np.ndarray: @@ -1403,6 +1404,7 @@ def highlight_max( Styler.highlight_null: Highlight missing values with a style. Styler.highlight_min: Highlight the minimum with a style. Styler.highlight_between: Highlight a defined range with a style. + Styler.highlight_quantile: Highlight values defined by a quantile with a style. """ def f(data: FrameOrSeries, props: str) -> np.ndarray: @@ -1451,6 +1453,7 @@ def highlight_min( Styler.highlight_null: Highlight missing values with a style. Styler.highlight_max: Highlight the maximum with a style. Styler.highlight_between: Highlight a defined range with a style. + Styler.highlight_quantile: Highlight values defined by a quantile with a style. """ def f(data: FrameOrSeries, props: str) -> np.ndarray: @@ -1507,6 +1510,7 @@ def highlight_between( Styler.highlight_null: Highlight missing values with a style. Styler.highlight_max: Highlight the maximum with a style. Styler.highlight_min: Highlight the minimum with a style. + Styler.highlight_quantile: Highlight values defined by a quantile with a style. Notes ----- @@ -1570,6 +1574,110 @@ def highlight_between( inclusive=inclusive, ) + def highlight_quantile( + self, + subset: IndexLabel | None = None, + color: str = "yellow", + axis: Axis | None = 0, + q_left: float = 0.0, + q_right: float = 1.0, + interpolation: str = "linear", + inclusive: str = "both", + props: str | None = None, + ) -> Styler: + """ + Highlight values defined by a quantile with a style. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + subset : IndexSlice, default None + A valid slice for ``data`` to limit the style application to. + color : str, default 'yellow' + Background color to use for highlighting + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Axis along which to determine and highlight quantiles. If ``None`` quantiles + are measured over the entire DataFrame. See examples. + q_left : float, default 0 + Left bound, in [0, q_right), for the target quantile range. + q_right : float, default 1 + Right bound, in (q_left, 1], for the target quantile range. + interpolation : {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’} + Argument passed to ``Series.quantile`` or ``DataFrame.quantile`` for + quantile estimation. + inclusive : {'both', 'neither', 'left', 'right'} + Identify whether quantile bounds are closed or open. + props : str, default None + CSS properties to use for highlighting. If ``props`` is given, ``color`` + is not used. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.highlight_null: Highlight missing values with a style. + Styler.highlight_max: Highlight the maximum with a style. + Styler.highlight_min: Highlight the minimum with a style. + Styler.highlight_between: Highlight a defined range with a style. + + Notes + ----- + This function does not work with ``str`` dtypes. + + Examples + -------- + Using ``axis=None`` and apply a quantile to all collective data + + >>> df = pd.DataFrame(np.arange(10).reshape(2,5) + 1) + >>> df.style.highlight_quantile(axis=None, q_left=0.8, color="#fffd75") + + .. figure:: ../../_static/style/hq_axNone.png + + Or highlight quantiles row-wise or column-wise, in this case by row-wise + + >>> df.style.highlight_quantile(axis=1, q_left=0.8, color="#fffd75") + + .. figure:: ../../_static/style/hq_ax1.png + + Use ``props`` instead of default background coloring + + >>> df.style.highlight_quantile(axis=None, q_left=0.2, q_right=0.8, + ... props='font-weight:bold;color:#e83e8c') + + .. figure:: ../../_static/style/hq_props.png + """ + subset_ = slice(None) if subset is None else subset + subset_ = non_reducing_slice(subset_) + data = self.data.loc[subset_] + + # after quantile is found along axis, e.g. along rows, + # applying the calculated quantile to alternate axis, e.g. to each column + kwargs = {"q": [q_left, q_right], "interpolation": interpolation} + if axis in [0, "index"]: + q = data.quantile(axis=axis, numeric_only=False, **kwargs) + axis_apply: int | None = 1 + elif axis in [1, "columns"]: + q = data.quantile(axis=axis, numeric_only=False, **kwargs) + axis_apply = 0 + else: # axis is None + q = Series(data.to_numpy().ravel()).quantile(**kwargs) + axis_apply = None + + if props is None: + props = f"background-color: {color};" + return self.apply( + _highlight_between, # type: ignore[arg-type] + axis=axis_apply, + subset=subset, + props=props, + left=q.iloc[0], + right=q.iloc[1], + inclusive=inclusive, + ) + @classmethod def from_custom_template(cls, searchpath, name): """ diff --git a/pandas/tests/io/formats/style/test_highlight.py b/pandas/tests/io/formats/style/test_highlight.py index b8c194f8955ab..9e956e055d1aa 100644 --- a/pandas/tests/io/formats/style/test_highlight.py +++ b/pandas/tests/io/formats/style/test_highlight.py @@ -142,3 +142,54 @@ def test_highlight_between_inclusive(styler, inclusive, expected): kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]} result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute() assert result.ctx == expected + + +@pytest.mark.parametrize( + "kwargs", + [ + {"q_left": 0.5, "q_right": 1, "axis": 0}, # base case + {"q_left": 0.5, "q_right": 1, "axis": None}, # test axis + {"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset + {"q_left": 0.5, "axis": 0}, # test no high + {"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low + {"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop + ], +) +def test_highlight_quantile(styler, kwargs): + expected = { + (2, 0): [("background-color", "yellow")], + (2, 1): [("background-color", "yellow")], + } + result = styler.highlight_quantile(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.skipif(np.__version__[:4] in ["1.16", "1.17"], reason="Numpy Issue #14831") +@pytest.mark.parametrize( + "f,kwargs", + [ + ("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}), + ("highlight_max", {"axis": 0, "subset": [0]}), + ("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}), + ("highlight_between", {"subset": [0]}), + ], +) +@pytest.mark.parametrize( + "df", + [ + DataFrame([[0, 10], [20, 30]], dtype=int), + DataFrame([[0, 10], [20, 30]], dtype=float), + DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"), + DataFrame([[0, 10], [20, 30]], dtype=str), + DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"), + ], +) +def test_all_highlight_dtypes(f, kwargs, df): + if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)): + return None # quantile incompatible with str + if f == "highlight_between": + kwargs["left"] = df.iloc[1, 0] # set the range low for testing + + expected = {(1, 0): [("background-color", "yellow")]} + result = getattr(df.style, f)(**kwargs)._compute().ctx + assert result == expected