pandas-dev · attack68 · Sep 18, 2021 · Sep 18, 2021 · Sep 18, 2021 · Sep 21, 2021
diff --git a/doc/source/_static/style/des_mean.png b/doc/source/_static/style/des_mean.png
diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst
@@ -45,6 +45,7 @@ Style application
    Styler.set_td_classes
    Styler.set_table_styles
    Styler.set_table_attributes
+   Styler.set_descriptors
    Styler.set_tooltips
    Styler.set_caption
    Styler.set_sticky

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -216,6 +216,47 @@
     "weather_df.loc[\"2021-01-04\":\"2021-01-08\"].style.pipe(make_pretty)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Describing Data\n",
+    "\n",
+    "The data can also be explored with the ability to add header level calculations. The [.set_descriptors()][descriptors] method is used here. We begin with a large DataFrame and reconfigure the `pandas.options` to reduce the rendered size, whilst adding descriptors we wish to calculate on the data.\n",
+    "\n",
+    "[descriptors]: ../reference/api/pandas.io.formats.style.Styler.set_descriptors.rst"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.options.styler.render.max_rows = 5\n",
+    "df_described = pd.DataFrame({\"A\": np.random.randn(1000), \n",
+    "                   \"B\": np.random.randint(low=-10, high=10, size=1000, dtype=\"int64\")})\n",
+    "df_described.style.set_descriptors([\n",
+    "    \"mean\",\n",
+    "    (\"mean 2dp\", lambda s: f\"{s.mean():.2f}\"),\n",
+    "    (\"std\", pd.Series.std),\n",
+    "    \"nunique\",\n",
+    "    lambda s: s.dtype,\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "nbsphinx": "hidden"
+   },
+   "outputs": [],
+   "source": [
+    "# Hidden cell to reset pandas options \n",
+    "pd.options.styler.render.max_rows = None"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1661,6 +1702,7 @@
     "  + `col<n>`, where `n` is the numeric position of the cell.\n",
     "- Blank cells include `blank`\n",
     "- Trimmed cells include `col_trim` or `row_trim`\n",
+    "- Descriptor name cells include `descriptor_name`, descriptor value cells include `descriptor_value` and both also include `descriptor<j>`, where `j` is the numeric index of the list of descriptors.\n",
     "\n",
     "The structure of the `id` is `T_uuid_level<k>_row<m>_col<n>` where `level<k>` is used only on headings, and headings will only have either `row<m>` or `col<n>` whichever is needed. By default we've also prepended each row/column identifier with a UUID unique to each DataFrame so that the style from one doesn't collide with the styling from another within the same notebook or page. You can read more about the use of UUIDs in [Optimization](#Optimization).\n",
     "\n",
@@ -1675,7 +1717,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(pd.DataFrame([[1,2],[3,4]], index=['i1', 'i2'], columns=['c1', 'c2']).style.to_html())"
+    "print(pd.DataFrame([[1,2],[3,4]], index=['i1', 'i2'], columns=['c1', 'c2']).style.set_descriptors([\"mean\"]).to_html())"
    ]
   },
   {

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -20,6 +20,7 @@ Styler
 ^^^^^^
 
   - New method :meth:`.Styler.to_string` for alternative customisable output methods (:issue:`44502`)
+  - Added a new method :meth:`.Styler.set_descriptors` which allows adding customised header rows to explore and make calculations on the data, e.g. totals and counts etc. (:issue:`43875`)
   - Various bug fixes, see below.
 
 .. _whatsnew_150.enhancements.enhancement2:

@@ -1435,6 +1435,7 @@ def _copy(self, deepcopy: bool = False) -> Styler:
         ]
         deep = [  # nested lists or dicts
             "css",
+            "descriptors",
             "_display_funcs",
             "_display_funcs_index",
             "_display_funcs_columns",
@@ -1979,6 +1980,9 @@ def export(self) -> dict[str, Any]:
 
         Can be applied to a second Styler with ``Styler.use``.
 
+        .. versionchanged:: 1.5.0
+           Adds ``descriptors`` to the exported items.
+
         Returns
         -------
         styles : dict
@@ -2000,6 +2004,7 @@ def export(self) -> dict[str, Any]:
           - Whether axes and names are hidden from the display, if unambiguous.
           - Table attributes
           - Table styles
+          - Descriptors
 
         The following attributes are considered data dependent and therefore not
         exported:
@@ -2029,6 +2034,7 @@ def export(self) -> dict[str, Any]:
             "hide_index_names": self.hide_index_names,
             "hide_column_names": self.hide_column_names,
             "css": copy.copy(self.css),
+            "descriptors": copy.copy(self.descriptors),
         }
 
     def use(self, styles: dict[str, Any]) -> Styler:
@@ -2037,6 +2043,9 @@ def use(self, styles: dict[str, Any]) -> Styler:
 
         Possibly uses styles from ``Styler.export``.
 
+        .. versionchanged:: 1.5.0
+           Adds ``descriptors`` to the used items.
+
         Parameters
         ----------
         styles : dict(str, Any)
@@ -2054,6 +2063,8 @@ def use(self, styles: dict[str, Any]) -> Styler:
               - "hide_index_names": whether index names are hidden.
               - "hide_column_names": whether column header names are hidden.
               - "css": the css class names used.
+              - "descriptors": list of descriptors, typically added with
+                ``set_descriptors``.
 
         Returns
         -------
@@ -2096,6 +2107,8 @@ def use(self, styles: dict[str, Any]) -> Styler:
         self.hide_column_names = styles.get("hide_column_names", False)
         if styles.get("css"):
             self.css = styles.get("css")  # type: ignore[assignment]
+        if styles.get("descriptors"):
+            self.set_descriptors(styles.get("descriptors"))
         return self
 
     def set_uuid(self, uuid: str) -> Styler:
@@ -2354,7 +2367,10 @@ def set_table_styles(
                                "row_trim": "row_trim",
                                "level": "level",
                                "data": "data",
-                               "blank": "blank}
+                               "blank": "blank",
+                               "descriptor": "descriptor",
+                               "descriptor_name": "descriptor_name",
+                               "descriptor_value": "descriptor_value"}
 
         Examples
         --------
@@ -2425,6 +2441,46 @@ def set_table_styles(
             self.table_styles = table_styles
         return self
 
+    def set_descriptors(
+        self, descriptors: list[str | Callable | tuple[str, Callable]] | None = None
+    ) -> Styler:
+        """
+        Add header-level calculations to the output which describes the data.
+
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        descriptors : list of str, callables or 2-tuples of str and callable
+            If a string is given must be a valid Series method, e.g. "mean" invokes
+            Series.mean().
+
+            If a callable is given must accept a Series and return a scalar. No name
+            will be displayed for the row.
+
+            If a 2-tuple, must be a string used as the name of the row and a
+            callable as above.
+
+        Returns
+        -------
+        self : Styler
+
+        Examples
+        --------
+
+        >>> df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+        >>> styler = df.style.set_descriptors([
+        ...     "mean",
+        ...     Series.mean,
+        ...     ("my-text", Series.mean),
+        ...     ("my-func", lambda s: s.sum()/2),
+        ... ])  # doctest: +SKIP
+
+        .. figure:: ../../_static/style/des_mean.png
+        """
+        self.descriptors = descriptors if descriptors is not None else []
+        return self
+
     def set_na_rep(self, na_rep: str) -> StylerRenderer:
         """
         Set the missing data representation on a ``Styler``.

@@ -115,6 +115,9 @@ def __init__(
             "level": "level",
             "data": "data",
             "blank": "blank",
+            "descriptor": "descriptor",
+            "descriptor_value": "descriptor_value",
+            "descriptor_name": "descriptor_name",
         }
 
         # add rendering variables
@@ -124,6 +127,7 @@ def __init__(
         self.hide_columns_: list = [False] * self.columns.nlevels
         self.hidden_rows: Sequence[int] = []  # sequence for specific hidden rows/cols
         self.hidden_columns: Sequence[int] = []
+        self.descriptors: list[str | Callable | tuple[str, Callable]] = []
         self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
         self.ctx_index: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
         self.ctx_columns: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
@@ -329,7 +333,9 @@ def _translate_header(self, sparsify_cols: bool, max_cols: int):
           1) |       ..                   |       ..      |             ..            |
              |  index_blanks ...          | column_name_n |  column_headers (level_n) |
              +----------------------------+---------------+---------------------------+
-          2) |  index_names (level_0 to level_n) ...      | column_blanks ...         |
+          2) |  index_blanks ...          | descriptor    |  value by column          |
+             +----------------------------+---------------+---------------------------+
+          3) |  index_names (level_0 to level_n) ...      | column_blanks ...         |
              +----------------------------+---------------+---------------------------+
 
         Parameters
@@ -365,7 +371,12 @@ def _translate_header(self, sparsify_cols: bool, max_cols: int):
                 )
                 head.append(header_row)
 
-        # 2) index names
+        # 2) Descriptor calcs
+        for r, descriptor in enumerate(self.descriptors):
+            descriptor_row = self._generate_descriptor_row((r, descriptor), max_cols)
+            head.append(descriptor_row)
+
+        # 3) index names
         if (
             self.data.index.names
             and com.any_not_none(*self.data.index.names)
@@ -477,6 +488,109 @@ def _generate_col_header_row(self, iter: tuple, max_cols: int, col_lengths: dict
 
         return index_blanks + column_name + column_headers
 
+    def _generate_descriptor_row(self, iter: tuple, max_cols: int):
+        """
+        Generate the row containing calculated descriptor values for columns:
+
+         +----------------------------+---------------+---------------------------+
+         |  index_blanks ...          | descriptor_i  |  value_i by col           |
+         +----------------------------+---------------+---------------------------+
+
+        Parameters
+        ----------
+        iter : tuple
+            Looping variables from outer scope
+        max_cols : int
+            Permissible number of columns
+
+        Returns
+        -------
+        list of elements
+        """
+
+        r, descriptor = iter
+
+        # number of index blanks is governed by number of hidden index levels
+        index_blanks = [
+            _element("th", self.css["blank"], self.css["blank_value"], True)
+        ] * (self.index.nlevels - sum(self.hide_index_) - 1)
+
+        if isinstance(descriptor, str):
+            name: str | None = descriptor
+            func: Callable = getattr(Series, descriptor)
+        elif isinstance(descriptor, tuple):
+            name, func = descriptor[0], descriptor[1]
+        else:
+            name, func = None, descriptor
+
+        display_func: Callable = _maybe_wrap_formatter(
+            formatter=None,  # use _default_formatter
+            decimal=get_option("styler.format.decimal"),
+            thousands=get_option("styler.format.thousands"),
+            precision=get_option("styler.format.precision"),
+            na_rep=get_option("styler.format.na_rep"),
+            escape=get_option("styler.format.escape"),
+        )
+
+        base_css = f"{self.css['descriptor_name']} {self.css['descriptor']}{r}"
+        descriptor_name = [
+            _element(
+                "th",
+                base_css
+                if (name is not None and not self.hide_column_names)
+                else f"{self.css['blank']} {base_css}",
+                name
+                if (name is not None and not self.hide_column_names)
+                else self.css["blank_value"],
+                not all(self.hide_index_),
+            )
+        ]
+
+        descriptor_values, visible_col_count = [], 0
+        for c, col in enumerate(self.columns):
+            if c not in self.hidden_columns:
+                header_element_visible = True
+                visible_col_count += 1
+                try:
+                    header_element_value = func(self.data[col])
+                except Exception:
+                    header_element_value = self.css["blank_value"]
+            else:
+                header_element_visible = False
+                header_element_value = None
+
+            if visible_col_count > max_cols:
+                # add an extra column with `...` value to indicate trimming
+                descriptor_values.append(
+                    _element(
+                        "th",
+                        (
+                            f"{self.css['descriptor_value']} "
+                            f"{self.css['descriptor']}{r} "
+                            f"{self.css['col_trim']}"
+                        ),
+                        "...",
+                        True,
+                        attributes="",
+                    )
+                )
+                break
+
+            header_element = _element(
+                "th",
+                (
+                    f"{self.css['descriptor_value']} {self.css['descriptor']}{r} "
+                    f"{self.css['col']}{c}"
+                ),
+                header_element_value,
+                header_element_visible,
+                display_value=display_func(header_element_value),
+                attributes="",
+            )
+            descriptor_values.append(header_element)
+
+        return index_blanks + descriptor_name + descriptor_values
+
     def _generate_index_names_row(self, iter: tuple, max_cols: int, col_lengths: dict):
         """
         Generate the row containing index names
@@ -1418,7 +1532,7 @@ def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any:
     """
     if isinstance(x, (float, complex)):
         return f"{x:,.{precision}f}" if thousands else f"{x:.{precision}f}"
-    elif isinstance(x, int):
+    elif isinstance(x, (int, np.int64)):
         return f"{x:,.0f}" if thousands else f"{x:.0f}"
     return x
 
@@ -1433,7 +1547,7 @@ def _wrap_decimal_thousands(
     """
 
     def wrapper(x):
-        if isinstance(x, (float, complex, int)):
+        if isinstance(x, (float, complex, int, np.int64)):
             if decimal != "." and thousands is not None and thousands != ",":
                 return (
                     formatter(x)

diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py
@@ -434,3 +434,21 @@ def test_1level_multiindex():
     assert ctx["body"][0][0]["is_visible"] is True
     assert ctx["body"][1][0]["display_value"] == "2"
     assert ctx["body"][1][0]["is_visible"] is True
+
+
+def test_format_descriptors(styler):
+    with option_context(
+        "styler.format.precision",
+        5,
+        "styler.format.decimal",
+        "*",
+        "styler.format.thousands",
+        "_",
+    ):
+        styler.set_descriptors([lambda s: s.sum() + 1000])
+        ctx = styler._translate(True, True)
+
+    exp_col_1 = {"value": 1001, "display_value": "1_001"}
+    assert exp_col_1.items() <= ctx["head"][1][1].items()
+    exp_col_2 = {"value": 998.163, "display_value": "998*16300"}
+    assert exp_col_2.items() <= ctx["head"][1][2].items()