diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 24f307f23f435..75ef2b820290b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -29,6 +29,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- Added HTML representation for grouped DataFrame and Series (:issue:`34926`) - - diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 27b898782fbef..992dbae21f0fd 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -110,6 +110,12 @@ def use_numba_cb(key): correct auto-detection. """ +pc_max_groups_doc = """ +: int + If max_groups is exceeded, switch to truncate groupby view. 'None' value + means unlimited. +""" + pc_min_rows_doc = """ : int The numbers of rows to show in a truncated view (when `max_rows` is @@ -355,6 +361,9 @@ def is_terminal() -> bool: validator=is_instance_factory((int, type(None))), ) cf.register_option("max_rows", 60, pc_max_rows_doc, validator=is_nonnegative_int) + cf.register_option( + "max_groups", 10, pc_max_groups_doc, validator=is_nonnegative_int + ) cf.register_option( "min_rows", 10, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8fb50db2e33f2..b3bc308f5d3e1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -113,6 +113,8 @@ class providing the base-class of operations. if TYPE_CHECKING: from typing import Literal +from pandas.io.formats.format import repr_html_groupby + _common_see_also = """ See Also -------- @@ -601,6 +603,9 @@ def __repr__(self) -> str: # TODO: Better repr for GroupBy object return object.__repr__(self) + def _repr_html_(self) -> str: + return repr_html_groupby(self) + @final @property def groups(self) -> dict[Hashable, np.ndarray]: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 83e0086958b9a..8df57eec177b5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -2086,3 +2086,42 @@ def buffer_put_lines(buf: IO[str], lines: list[str]) -> None: if any(isinstance(x, str) for x in lines): lines = [str(x) for x in lines] buf.write("\n".join(lines)) + + +def repr_html_groupby(group_obj) -> str: + """ + Create an HTML representation for a grouped DataFrame or Series. + + Parameters + ---------- + group_obj : [DataFrameGroupBy, SeriesGroupBy] + Object to make HTML representation of. + Returns + ------- + str : + HTML representation of the input object. + """ + max_groups = get_option("display.max_groups") + max_rows = max( + 1, get_option("display.max_rows") // min(max_groups, group_obj.ngroups) + ) + group_names = list(group_obj.groups.keys()) + truncated = max_groups < group_obj.ngroups + if truncated: + n_start = (max_groups + 1) // 2 + n_end = max_groups - n_start + group_names = group_names[:n_start] + group_names[-n_end:] + repr_html_list = list() + for group_name in group_names: + if not isinstance(group_name, tuple): + group = group_obj.get_group((group_name, )) + else: + group = group_obj.get_group(group_name) + if not hasattr(group, "to_html"): + group = group.to_frame() + repr_html_list.append( + f"