From 5e1cb8c3756df2cfcd08c743583281e159171d2c Mon Sep 17 00:00:00 2001 From: JulianW Date: Mon, 22 Jun 2020 03:07:34 +0200 Subject: [PATCH 01/30] Add html repr for groupby dataframe and series --- pandas/core/groupby/groupby.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 02f7f605a7605..fd6d778c69db3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -33,6 +33,7 @@ class providing the base-class of operations. import numpy as np from pandas._config.config import option_context +from pandas._config import get_option from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby @@ -542,6 +543,17 @@ def __repr__(self) -> str: # TODO: Better repr for GroupBy object return object.__repr__(self) + def _repr_html_(self) -> str: + html_text = "" + for idx, df_group in self: + if not hasattr(df_group, "to_html"): + df_group = df_group.to_frame() + html_text += f"

Group Key: {idx}

" + html_text += df_group.to_html( + max_rows=get_option("display.max_rows") // self.ngroups + ) + return html_text + def _assure_grouper(self): """ We create the grouper on instantiation sub-classes may have a From 9c3df8a0167cc4c77c8ef86e30df6fc6d82fae59 Mon Sep 17 00:00:00 2001 From: JulianW Date: Mon, 22 Jun 2020 10:57:11 +0200 Subject: [PATCH 02/30] Sort imports with isort in groupby.py --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fd6d778c69db3..0329707f13cf3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -32,8 +32,8 @@ class providing the base-class of operations. import numpy as np -from pandas._config.config import option_context from pandas._config import get_option +from pandas._config.config import option_context from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby From 4a3911a6f4547d5e6c198ac11c8ccade30213c77 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 25 Jul 2020 13:48:47 +0200 Subject: [PATCH 03/30] Improve variable naming --- pandas/core/groupby/groupby.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 0329707f13cf3..a922799b8f976 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -544,15 +544,15 @@ def __repr__(self) -> str: return object.__repr__(self) def _repr_html_(self) -> str: - html_text = "" - for idx, df_group in self: - if not hasattr(df_group, "to_html"): - df_group = df_group.to_frame() - html_text += f"

Group Key: {idx}

" - html_text += df_group.to_html( + repr_html = "" + for group_name, group in self: + if not hasattr(group, "to_html"): + group = group.to_frame() + repr_html += f"

Group Key: {group_name}

" + repr_html += group.to_html( max_rows=get_option("display.max_rows") // self.ngroups ) - return html_text + return repr_html def _assure_grouper(self): """ From 46f5353e5ff1f362733a9dde8d9a453bd663b4c1 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 25 Jul 2020 14:01:48 +0200 Subject: [PATCH 04/30] Add display.max_groups to config --- pandas/core/config_init.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 5089445c79897..b574a72dbc8e7 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -91,6 +91,12 @@ def use_numexpr_cb(key): correct auto-detection. """ +pc_max_groups_doc = """ +: int + If max_groups is exceeded, switch to truncate groupby view. 'None' value + means unlimited. +""" + pc_min_rows_doc = """ : int The numbers of rows to show in a truncated view (when `max_rows` is @@ -336,6 +342,7 @@ def is_terminal() -> bool: validator=is_instance_factory((int, type(None))), ) cf.register_option("max_rows", 60, pc_max_rows_doc, validator=is_nonnegative_int) + cf.register_option("max_groups", 10, pc_max_groups_doc, validator=is_nonnegative_int) cf.register_option( "min_rows", 10, From c840c29a0358c1abf109eb3bbca06528e9305fea Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 25 Jul 2020 14:46:15 +0200 Subject: [PATCH 05/30] Add group display truncation for too many groups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For 20,000 groups the function takes 260 ms ± 12.2 ms - Alternative approach, how to get in the dots in the middle elegantly? def _repr_html_(self) -> str: group_names = list(self.groups.keys()) max_groups = get_option("display.max_groups") if max_groups < self.ngroups: n_start = (max_groups + 1) // 2 n_end = max_groups - n_start group_names = group_names[:n_start] + group_names[-n_end:] repr_html = "" for group_name in group_names: group = self.groups[group_name] if not hasattr(group, "to_html"): group = group.to_frame() repr_html += f"

Group Key: {group_name}

" repr_html += group.to_html( max_rows=get_option("display.max_rows") // self.ngroups ) return repr_html --- pandas/core/groupby/groupby.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a922799b8f976..9e780b6772a32 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -544,14 +544,22 @@ def __repr__(self) -> str: return object.__repr__(self) def _repr_html_(self) -> str: + max_groups = get_option("display.max_groups") + ngroups = self.ngroups + n_start = (max_groups + 1) // 2 + n_end = max_groups - n_start repr_html = "" - for group_name, group in self: - if not hasattr(group, "to_html"): - group = group.to_frame() - repr_html += f"

Group Key: {group_name}

" - repr_html += group.to_html( - max_rows=get_option("display.max_rows") // self.ngroups - ) + truncated = ngroups > max_groups + for k, (group_name, group) in enumerate(self): + if not truncated or (n_start > k or k >= ngroups - n_end): + if not hasattr(group, "to_html"): + group = group.to_frame() + repr_html += f"

Group Key: {group_name}

" + repr_html += group.to_html( + max_rows=max(1, get_option("display.max_rows") // self.ngroups) + ) + elif k == max_groups // 2: + repr_html += "

...

" return repr_html def _assure_grouper(self): From 139bdc68000f597b378dbf38221b1e012befd187 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 25 Jul 2020 15:13:21 +0200 Subject: [PATCH 06/30] Implement faster and more scalable list variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - takes 8.97 ms ± 352 µs for 20,000 groups and is not that dependent on numer of groups --- pandas/core/groupby/groupby.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9e780b6772a32..f32a55b7b7121 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -545,22 +545,24 @@ def __repr__(self) -> str: def _repr_html_(self) -> str: max_groups = get_option("display.max_groups") - ngroups = self.ngroups - n_start = (max_groups + 1) // 2 - n_end = max_groups - n_start - repr_html = "" - truncated = ngroups > max_groups - for k, (group_name, group) in enumerate(self): - if not truncated or (n_start > k or k >= ngroups - n_end): - if not hasattr(group, "to_html"): - group = group.to_frame() - repr_html += f"

Group Key: {group_name}

" - repr_html += group.to_html( - max_rows=max(1, get_option("display.max_rows") // self.ngroups) - ) - elif k == max_groups // 2: - repr_html += "

...

" - return repr_html + max_rows = max(1, get_option("display.max_rows") // self.ngroups) + group_names = list(self.groups.keys()) + truncated = max_groups < self.ngroups + if truncated: + n_start = (max_groups + 1) // 2 + n_end = max_groups - n_start + group_names = group_names[:n_start] + group_names[-n_end:] + repr_html_list = list() + for group_name in group_names: + group = self.get_group(group_name) + if not hasattr(group, "to_html"): + group = group.to_frame() + repr_html_list.append( + f"

Group Key: {group_name}

\n{group.to_html(max_rows=max_rows)}" + ) + if truncated: + repr_html_list.insert(max_groups // 2, "

...

") + return "\n".join(repr_html_list) def _assure_grouper(self): """ From 1020be9f77d8bdecfe8c6649f04dd1d4de7a35e5 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 25 Jul 2020 15:15:29 +0200 Subject: [PATCH 07/30] Black config_init --- pandas/core/config_init.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index b574a72dbc8e7..9299e17c13ffa 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -342,7 +342,9 @@ def is_terminal() -> bool: validator=is_instance_factory((int, type(None))), ) cf.register_option("max_rows", 60, pc_max_rows_doc, validator=is_nonnegative_int) - cf.register_option("max_groups", 10, pc_max_groups_doc, validator=is_nonnegative_int) + cf.register_option( + "max_groups", 10, pc_max_groups_doc, validator=is_nonnegative_int + ) cf.register_option( "min_rows", 10, From 2e4a6eec1f543d40ea318c97a161773195810eb2 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 25 Jul 2020 15:18:54 +0200 Subject: [PATCH 08/30] Fix bug which displayed too few rows - max rows per group was calculated with the number of groups in the groupby and did not consider max_groups setting --- pandas/core/groupby/groupby.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f32a55b7b7121..fbb23e0d56c1c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -545,7 +545,9 @@ def __repr__(self) -> str: def _repr_html_(self) -> str: max_groups = get_option("display.max_groups") - max_rows = max(1, get_option("display.max_rows") // self.ngroups) + max_rows = max( + 1, get_option("display.max_rows") // min(max_groups, self.ngroups) + ) group_names = list(self.groups.keys()) truncated = max_groups < self.ngroups if truncated: From ea2f1518204570815782e3a5749f13c283230ae6 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 26 Jul 2020 10:43:56 +0200 Subject: [PATCH 09/30] Add test for groupby representation - load dataframes from html output and compare them --- pandas/tests/groupby/test_groupby.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0d040b8e6955a..4d25c8b318695 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2047,3 +2047,27 @@ def test_groups_repr_truncates(max_seq_items, expected): result = df.groupby(np.array(df.a)).groups.__repr__() assert result == expected + + +def test_groupby_repr(): + """ + This test only works when all groups and all rows in a group are shown in + html output. + """ + n_groups = 5 + length = n_groups * 5 + df = pd.DataFrame( + { + "A": range(length), + "B": range(0, length * 2, 2), + "C": list(range(n_groups)) * (length // n_groups), + } + ) + + df_groupby = df.groupby("C") + html_groupby = df_groupby._repr_html_() + + dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) + + for k, (group_name, df_group) in enumerate(df_groupby): + tm.assert_frame_equal(dfs_from_html[k], df_group) From 2443b807784eb4a73e5b3b5719eb0906516ce951 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 26 Jul 2020 10:48:08 +0200 Subject: [PATCH 10/30] Delete trailing whitespace in comment - https://github.com/pandas-dev/pandas/pull/34926/checks?check_run_id=909776147#step:6:16 - black did not pick it up --- pandas/core/config_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 9299e17c13ffa..e5c1ad99185cd 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -93,7 +93,7 @@ def use_numexpr_cb(key): pc_max_groups_doc = """ : int - If max_groups is exceeded, switch to truncate groupby view. 'None' value + If max_groups is exceeded, switch to truncate groupby view. 'None' value means unlimited. """ From 913afb0006b5ef6a7f2c818779aaf3e5c8940d2e Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 26 Jul 2020 11:19:54 +0200 Subject: [PATCH 11/30] Add test cases for truncated rows and groups --- pandas/tests/groupby/test_groupby.py | 64 +++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4d25c8b318695..a98d5a4854e08 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2051,8 +2051,7 @@ def test_groups_repr_truncates(max_seq_items, expected): def test_groupby_repr(): """ - This test only works when all groups and all rows in a group are shown in - html output. + All groups and all rows in a group are shown in html output. """ n_groups = 5 length = n_groups * 5 @@ -2071,3 +2070,64 @@ def test_groupby_repr(): for k, (group_name, df_group) in enumerate(df_groupby): tm.assert_frame_equal(dfs_from_html[k], df_group) + + +def test_groupby_repr_truncated_group(): + """ + In the groups not all rows are shown in html output. + """ + n_groups = 10 + length = n_groups * 20 + + df = pd.DataFrame( + { + "A": range(length), + "B": range(0, length * 2, 2), + "C": list(range(n_groups)) * (length // n_groups), + } + ) + + df_groupby = df.groupby("C") + html_groupby = df_groupby._repr_html_() + + dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) + + # For each group only test first and last row + for k, (group_name, df_group) in enumerate(df_groupby): + dtype = df_group.iloc[0].dtype + tm.assert_series_equal( + dfs_from_html[k].iloc[0].astype(dtype), df_group.iloc[0], check_names=False + ) + tm.assert_series_equal( + dfs_from_html[k].iloc[-1].astype(dtype), + df_group.iloc[-1], + check_names=False, + ) + + +def test_groupby_repr_not_all_groups(): + """ + Not all groups are shown in html output. + """ + n_groups = 30 + length = n_groups * 5 + df = pd.DataFrame( + { + "A": range(length), + "B": range(0, length * 2, 2), + "C": list(range(n_groups)) * (length // n_groups), + } + ) + + df_groupby = df.groupby("C") + html_groupby = df_groupby._repr_html_() + + dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) + + # Test first and last group + tm.assert_frame_equal( + dfs_from_html[0], df_groupby.get_group(tuple(df_groupby.groups.keys())[0]) + ) + tm.assert_frame_equal( + dfs_from_html[-1], df_groupby.get_group(tuple(df_groupby.groups.keys())[-1]) + ) From d85fc63ab43021581ca88bd0e28db9ad52955553 Mon Sep 17 00:00:00 2001 From: JulianW Date: Thu, 3 Sep 2020 19:39:46 +0200 Subject: [PATCH 12/30] Skip test if lxml is not installed --- pandas/tests/groupby/test_groupby.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4e0fe37971054..daa3b513c596c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6,6 +6,7 @@ import pytest from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv @@ -2057,6 +2058,7 @@ def test_groups_repr_truncates(max_seq_items, expected): assert result == expected +@td.skip_if_no("lxml") def test_groupby_repr(): """ All groups and all rows in a group are shown in html output. @@ -2080,6 +2082,7 @@ def test_groupby_repr(): tm.assert_frame_equal(dfs_from_html[k], df_group) +@td.skip_if_no("lxml") def test_groupby_repr_truncated_group(): """ In the groups not all rows are shown in html output. @@ -2113,6 +2116,7 @@ def test_groupby_repr_truncated_group(): ) +@td.skip_if_no("lxml") def test_groupby_repr_not_all_groups(): """ Not all groups are shown in html output. From 778d90d0ef0179bfa88f236ccdef95843f25e14e Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 5 Sep 2020 18:07:07 +0200 Subject: [PATCH 13/30] Move html repr function to io/formats/format.py - from groupby module - https://github.com/pandas-dev/pandas/pull/34926#discussion_r483957300 --- pandas/core/groupby/groupby.py | 25 ++++--------------------- pandas/io/formats/format.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9b20dfd8cec7c..5abe6fcdba6a3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -68,6 +68,8 @@ class providing the base-class of operations. from pandas.core.sorting import get_group_index_sorter from pandas.core.util.numba_ import NUMBA_FUNC_CACHE +from pandas.io.formats.format import repr_html_groupby + _common_see_also = """ See Also -------- @@ -550,27 +552,8 @@ def __repr__(self) -> str: return object.__repr__(self) def _repr_html_(self) -> str: - max_groups = get_option("display.max_groups") - max_rows = max( - 1, get_option("display.max_rows") // min(max_groups, self.ngroups) - ) - group_names = list(self.groups.keys()) - truncated = max_groups < self.ngroups - if truncated: - n_start = (max_groups + 1) // 2 - n_end = max_groups - n_start - group_names = group_names[:n_start] + group_names[-n_end:] - repr_html_list = list() - for group_name in group_names: - group = self.get_group(group_name) - if not hasattr(group, "to_html"): - group = group.to_frame() - repr_html_list.append( - f"

Group Key: {group_name}

\n{group.to_html(max_rows=max_rows)}" - ) - if truncated: - repr_html_list.insert(max_groups // 2, "

...

") - return "\n".join(repr_html_list) + return repr_html_groupby(self) + def _assure_grouper(self): """ diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 81990b3d505e1..539abebdb77f7 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1990,3 +1990,27 @@ def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: if any(isinstance(x, str) for x in lines): lines = [str(x) for x in lines] buf.write("\n".join(lines)) + + +def repr_html_groupby(group_obj): + max_groups = get_option("display.max_groups") + max_rows = max( + 1, get_option("display.max_rows") // min(max_groups, group_obj.ngroups) + ) + group_names = list(group_obj.groups.keys()) + truncated = max_groups < group_obj.ngroups + if truncated: + n_start = (max_groups + 1) // 2 + n_end = max_groups - n_start + group_names = group_names[:n_start] + group_names[-n_end:] + repr_html_list = list() + for group_name in group_names: + group = group_obj.get_group(group_name) + if not hasattr(group, "to_html"): + group = group.to_frame() + repr_html_list.append( + f"

Group Key: {group_name}

\n{group.to_html(max_rows=max_rows)}" + ) + if truncated: + repr_html_list.insert(max_groups // 2, "

...

") + return "\n".join(repr_html_list) From 97360073a3ce1260a170b762dfc2a1d7ba0a5977 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 5 Sep 2020 18:15:45 +0200 Subject: [PATCH 14/30] Add doc string and return type annotation --- pandas/io/formats/format.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 539abebdb77f7..d3e133216db0c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1992,7 +1992,19 @@ def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: buf.write("\n".join(lines)) -def repr_html_groupby(group_obj): +def repr_html_groupby(group_obj) -> str: + """ + Create a HTML representation for a grouped dataframe or series. + + Parameters + ---------- + group_obj : [DataFrameGroupBy, SeriesGroupBy] + A grouped dataframe or a series of a grouped dataframe. + Returns + ------- + str : + HTML representation of the input object. + """ max_groups = get_option("display.max_groups") max_rows = max( 1, get_option("display.max_rows") // min(max_groups, group_obj.ngroups) From 7f1937c0417ac83926bce1c83779cb8135be0f1b Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 5 Sep 2020 19:50:38 +0200 Subject: [PATCH 15/30] Add type annotations for input arg --- pandas/io/formats/format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index d3e133216db0c..f23e9df1f4d87 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -73,6 +73,7 @@ if TYPE_CHECKING: from pandas import Categorical, DataFrame, Series + from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy FormattersType = Union[ List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] @@ -1992,7 +1993,7 @@ def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: buf.write("\n".join(lines)) -def repr_html_groupby(group_obj) -> str: +def repr_html_groupby(group_obj: Union[DataFrameGroupBy, SeriesGroupBy]) -> str: """ Create a HTML representation for a grouped dataframe or series. From 388f35d2ae1ac5f8418968ae02d129f62439cc83 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 5 Sep 2020 19:52:27 +0200 Subject: [PATCH 16/30] Fix linting errors --- pandas/core/groupby/groupby.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5abe6fcdba6a3..98dfe9328866b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -32,7 +32,6 @@ class providing the base-class of operations. import numpy as np -from pandas._config import get_option from pandas._config.config import option_context from pandas._libs import Timestamp, lib @@ -554,7 +553,6 @@ def __repr__(self) -> str: def _repr_html_(self) -> str: return repr_html_groupby(self) - def _assure_grouper(self): """ We create the grouper on instantiation sub-classes may have a From 228e65999e10289b8b34eb68f5de1ee9ff253fb9 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 5 Sep 2020 20:58:38 +0200 Subject: [PATCH 17/30] Move import to the correct location --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f23e9df1f4d87..ac09b6b189024 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -70,10 +70,10 @@ from pandas.io.common import stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing +from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy if TYPE_CHECKING: from pandas import Categorical, DataFrame, Series - from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy FormattersType = Union[ List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] From dee1220432c883c086360b0aa7dd1b3c22084df2 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sat, 3 Oct 2020 15:03:24 +0200 Subject: [PATCH 18/30] Remove pandas type annotations because of circular import https://github.com/pandas-dev/pandas/pull/34926/checks --- pandas/io/formats/format.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ac09b6b189024..d3e133216db0c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -70,7 +70,6 @@ from pandas.io.common import stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing -from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy if TYPE_CHECKING: from pandas import Categorical, DataFrame, Series @@ -1993,7 +1992,7 @@ def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: buf.write("\n".join(lines)) -def repr_html_groupby(group_obj: Union[DataFrameGroupBy, SeriesGroupBy]) -> str: +def repr_html_groupby(group_obj) -> str: """ Create a HTML representation for a grouped dataframe or series. From 2c5c3947ccba7f19368a02209e3aa63ea1735d0b Mon Sep 17 00:00:00 2001 From: JulianW Date: Wed, 28 Oct 2020 17:42:25 +0100 Subject: [PATCH 19/30] Remove inconsistent use of pd namespace in tests - https://github.com/pandas-dev/pandas/pull/34926/checks?check_run_id=1309077928 --- pandas/tests/groupby/test_groupby.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7ad53c88e626c..76c993a482cc7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2067,7 +2067,7 @@ def test_groupby_repr(): """ n_groups = 5 length = n_groups * 5 - df = pd.DataFrame( + df = DataFrame( { "A": range(length), "B": range(0, length * 2, 2), @@ -2092,7 +2092,7 @@ def test_groupby_repr_truncated_group(): n_groups = 10 length = n_groups * 20 - df = pd.DataFrame( + df = DataFrame( { "A": range(length), "B": range(0, length * 2, 2), @@ -2125,7 +2125,7 @@ def test_groupby_repr_not_all_groups(): """ n_groups = 30 length = n_groups * 5 - df = pd.DataFrame( + df = DataFrame( { "A": range(length), "B": range(0, length * 2, 2), From 669c047fd886455e852e98904ce0830d2dc95a0f Mon Sep 17 00:00:00 2001 From: JulianW Date: Fri, 30 Oct 2020 16:24:48 +0100 Subject: [PATCH 20/30] Fix typo and capitalize pandas objs correctly --- pandas/io/formats/format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 58c2c17b4bb77..0c4732996e1d3 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -2024,12 +2024,12 @@ def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: def repr_html_groupby(group_obj) -> str: """ - Create a HTML representation for a grouped dataframe or series. + Create an HTML representation for a grouped DataFrame or Series. Parameters ---------- group_obj : [DataFrameGroupBy, SeriesGroupBy] - A grouped dataframe or a series of a grouped dataframe. + Object to make HTML representation of. Returns ------- str : From 8a7529985dba529bd5d71941dc5c940491fd9d1d Mon Sep 17 00:00:00 2001 From: JulianW Date: Fri, 30 Oct 2020 16:37:20 +0100 Subject: [PATCH 21/30] Change docstring to comment in groupby repr test - add link to Github Pull Request --- pandas/tests/groupby/test_groupby.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 76c993a482cc7..6d0ab3f1f879c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2062,9 +2062,7 @@ def test_groups_repr_truncates(max_seq_items, expected): @td.skip_if_no("lxml") def test_groupby_repr(): - """ - All groups and all rows in a group are shown in html output. - """ + # GH 34926 - All groups and all rows in a group are shown in html output. n_groups = 5 length = n_groups * 5 df = DataFrame( @@ -2086,9 +2084,7 @@ def test_groupby_repr(): @td.skip_if_no("lxml") def test_groupby_repr_truncated_group(): - """ - In the groups not all rows are shown in html output. - """ + # GH 34926 - In the groups not all rows are shown in html output. n_groups = 10 length = n_groups * 20 @@ -2120,9 +2116,7 @@ def test_groupby_repr_truncated_group(): @td.skip_if_no("lxml") def test_groupby_repr_not_all_groups(): - """ - Not all groups are shown in html output. - """ + # GH 34926 - Not all groups are shown in html output. n_groups = 30 length = n_groups * 5 df = DataFrame( From b36177df1d7a2d13aed264dd78d6602bb4fc3dc9 Mon Sep 17 00:00:00 2001 From: JulianW Date: Fri, 30 Oct 2020 16:38:06 +0100 Subject: [PATCH 22/30] Add additional explanation in groupby_repr test - why only the first/last rows/groups are tested --- pandas/tests/groupby/test_groupby.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6d0ab3f1f879c..235236f197e33 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2102,6 +2102,11 @@ def test_groupby_repr_truncated_group(): dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) # For each group only test first and last row + # Those rows will always be shown. No logic is needed to calculate how many + # rows are tested (which would mirror the implementation). + # Setting a fixed number would make the test very specific for the given + # number of groups and DataFrame length. + # Correctness of output is tested in test_groupby_repr for k, (group_name, df_group) in enumerate(df_groupby): dtype = df_group.iloc[0].dtype tm.assert_series_equal( @@ -2133,6 +2138,11 @@ def test_groupby_repr_not_all_groups(): dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) # Test first and last group + # Those groups will always be shown. No logic is needed to calculate how + # many groups are tested (which would mirror the implementation). + # Setting a fixed number would make the test very specific for the given + # number of groups and DataFrame length. + # Correctness of output is tested in test_groupby_repr tm.assert_frame_equal( dfs_from_html[0], df_groupby.get_group(tuple(df_groupby.groups.keys())[0]) ) From edff21dd6752a7ce6d549de0de3e841e496fc13d Mon Sep 17 00:00:00 2001 From: JulianW Date: Wed, 11 Nov 2020 07:56:44 +0100 Subject: [PATCH 23/30] Test more rows in groupby repr when truncated --- pandas/tests/groupby/test_groupby.py | 36 +++++++++++++--------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 235236f197e33..937b33b12491e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2083,16 +2083,15 @@ def test_groupby_repr(): @td.skip_if_no("lxml") -def test_groupby_repr_truncated_group(): +@pytest.mark.parametrize("n_groups,n_rows,check_n_rows", [(4, 400, 7)]) +def test_groupby_repr_truncated_group(n_groups, n_rows, check_n_rows): # GH 34926 - In the groups not all rows are shown in html output. - n_groups = 10 - length = n_groups * 20 df = DataFrame( { - "A": range(length), - "B": range(0, length * 2, 2), - "C": list(range(n_groups)) * (length // n_groups), + "A": range(n_rows), + "B": range(0, n_rows * 2, 2), + "C": list(range(n_groups)) * (n_rows // n_groups), } ) @@ -2100,22 +2099,21 @@ def test_groupby_repr_truncated_group(): html_groupby = df_groupby._repr_html_() dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) + # Filter out row with dots dots displaying hidden DataFrames + dfs_from_html = [ + df_from_html[df_from_html.index != "..."] for df_from_html in dfs_from_html + ] + for k in range(len(dfs_from_html)): + # Convert to int (orginal dtype) + dfs_from_html[k] = dfs_from_html[k].astype(int) + dfs_from_html[k].index = dfs_from_html[k].index.astype(int) - # For each group only test first and last row - # Those rows will always be shown. No logic is needed to calculate how many - # rows are tested (which would mirror the implementation). - # Setting a fixed number would make the test very specific for the given - # number of groups and DataFrame length. - # Correctness of output is tested in test_groupby_repr for k, (group_name, df_group) in enumerate(df_groupby): - dtype = df_group.iloc[0].dtype - tm.assert_series_equal( - dfs_from_html[k].iloc[0].astype(dtype), df_group.iloc[0], check_names=False + tm.assert_frame_equal( + dfs_from_html[k].iloc[:check_n_rows], df_group.iloc[:check_n_rows], ) - tm.assert_series_equal( - dfs_from_html[k].iloc[-1].astype(dtype), - df_group.iloc[-1], - check_names=False, + tm.assert_frame_equal( + dfs_from_html[k].iloc[-check_n_rows:], df_group.iloc[-check_n_rows:], ) From 580d09bf2f989686659cc8d45e106b363b23bd60 Mon Sep 17 00:00:00 2001 From: JulianW Date: Wed, 11 Nov 2020 08:09:31 +0100 Subject: [PATCH 24/30] Test more groups in groupby repr when truncated --- pandas/tests/groupby/test_groupby.py | 31 ++++++++++++---------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 937b33b12491e..7a368d59b83ec 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2118,15 +2118,14 @@ def test_groupby_repr_truncated_group(n_groups, n_rows, check_n_rows): @td.skip_if_no("lxml") -def test_groupby_repr_not_all_groups(): +@pytest.mark.parametrize("n_groups,n_rows,check_n_groups", [(30, 150, 5)]) +def test_groupby_repr_not_all_groups(n_groups, n_rows, check_n_groups): # GH 34926 - Not all groups are shown in html output. - n_groups = 30 - length = n_groups * 5 df = DataFrame( { - "A": range(length), - "B": range(0, length * 2, 2), - "C": list(range(n_groups)) * (length // n_groups), + "A": range(n_rows), + "B": range(0, n_rows * 2, 2), + "C": list(range(n_groups)) * (n_rows // n_groups), } ) @@ -2135,18 +2134,14 @@ def test_groupby_repr_not_all_groups(): dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) - # Test first and last group - # Those groups will always be shown. No logic is needed to calculate how - # many groups are tested (which would mirror the implementation). - # Setting a fixed number would make the test very specific for the given - # number of groups and DataFrame length. - # Correctness of output is tested in test_groupby_repr - tm.assert_frame_equal( - dfs_from_html[0], df_groupby.get_group(tuple(df_groupby.groups.keys())[0]) - ) - tm.assert_frame_equal( - dfs_from_html[-1], df_groupby.get_group(tuple(df_groupby.groups.keys())[-1]) - ) + for k in range(check_n_groups): + tm.assert_frame_equal( + dfs_from_html[k], df_groupby.get_group(tuple(df_groupby.groups.keys())[k]) + ) + tm.assert_frame_equal( + dfs_from_html[-k - 1], + df_groupby.get_group(tuple(df_groupby.groups.keys())[-k - 1]), + ) def test_group_on_two_row_multiindex_returns_one_tuple_key(): From 0c948e1c6280172114d0020028c8f6976dcc26e9 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 3 Jan 2021 14:58:55 +0100 Subject: [PATCH 25/30] Refactor groups repr html - combine all previous test cases into one parametrized test case --- pandas/tests/groupby/test_groupby.py | 93 ++++++++-------------------- 1 file changed, 26 insertions(+), 67 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7a368d59b83ec..7d7b9be012160 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2061,32 +2061,17 @@ def test_groups_repr_truncates(max_seq_items, expected): @td.skip_if_no("lxml") -def test_groupby_repr(): - # GH 34926 - All groups and all rows in a group are shown in html output. - n_groups = 5 - length = n_groups * 5 - df = DataFrame( - { - "A": range(length), - "B": range(0, length * 2, 2), - "C": list(range(n_groups)) * (length // n_groups), - } - ) - - df_groupby = df.groupby("C") - html_groupby = df_groupby._repr_html_() - - dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) - - for k, (group_name, df_group) in enumerate(df_groupby): - tm.assert_frame_equal(dfs_from_html[k], df_group) - - -@td.skip_if_no("lxml") -@pytest.mark.parametrize("n_groups,n_rows,check_n_rows", [(4, 400, 7)]) -def test_groupby_repr_truncated_group(n_groups, n_rows, check_n_rows): - # GH 34926 - In the groups not all rows are shown in html output. - +@pytest.mark.parametrize( + "n_groups,n_rows,check_n_groups,check_n_rows", + [ + (10, 60, 5, 3), # All groups and all rows in the groups are shown + (25, 100, 5, 2), # Not all groups are shown + (4, 400, 2, 7), # Not all rows are shown in the groups + (20, 400, 5, 3), # Not all groups and not all rows in the groups are shown + ], +) +def test_groupby_repr_not_all_groups(n_groups, n_rows, check_n_groups, check_n_rows): + # GH 34926 df = DataFrame( { "A": range(n_rows), @@ -2098,50 +2083,24 @@ def test_groupby_repr_truncated_group(n_groups, n_rows, check_n_rows): df_groupby = df.groupby("C") html_groupby = df_groupby._repr_html_() - dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) - # Filter out row with dots dots displaying hidden DataFrames - dfs_from_html = [ - df_from_html[df_from_html.index != "..."] for df_from_html in dfs_from_html - ] - for k in range(len(dfs_from_html)): - # Convert to int (orginal dtype) - dfs_from_html[k] = dfs_from_html[k].astype(int) - dfs_from_html[k].index = dfs_from_html[k].index.astype(int) - - for k, (group_name, df_group) in enumerate(df_groupby): - tm.assert_frame_equal( - dfs_from_html[k].iloc[:check_n_rows], df_group.iloc[:check_n_rows], - ) - tm.assert_frame_equal( - dfs_from_html[k].iloc[-check_n_rows:], df_group.iloc[-check_n_rows:], - ) + df_from_html = pd.concat(pd.read_html(StringIO(html_groupby), index_col=0)) + # Drop "..." rows and convert index and data to int + df_from_html = df_from_html[df_from_html.index != "..."].astype(int) + df_from_html.index = df_from_html.index.astype(int) -@td.skip_if_no("lxml") -@pytest.mark.parametrize("n_groups,n_rows,check_n_groups", [(30, 150, 5)]) -def test_groupby_repr_not_all_groups(n_groups, n_rows, check_n_groups): - # GH 34926 - Not all groups are shown in html output. - df = DataFrame( - { - "A": range(n_rows), - "B": range(0, n_rows * 2, 2), - "C": list(range(n_groups)) * (n_rows // n_groups), - } + # Iterate over the first and last "check_n_groups" groups + df_group_iter = ( + list(df_groupby)[:check_n_groups] + list(df_groupby)[-check_n_groups:] ) - - df_groupby = df.groupby("C") - html_groupby = df_groupby._repr_html_() - - dfs_from_html = pd.read_html(StringIO(html_groupby), index_col=0) - - for k in range(check_n_groups): - tm.assert_frame_equal( - dfs_from_html[k], df_groupby.get_group(tuple(df_groupby.groups.keys())[k]) - ) - tm.assert_frame_equal( - dfs_from_html[-k - 1], - df_groupby.get_group(tuple(df_groupby.groups.keys())[-k - 1]), - ) + for group_name, df_group in df_group_iter: + # Iterate over the first and last "check_n_rows" of every group + df_iter = pd.concat( + [df_group.iloc[:check_n_rows], df_group.iloc[-check_n_rows:]] + ).iterrows() + for index, row in df_iter: + print(group_name, index) + tm.assert_series_equal(row, df_from_html.loc[index]) def test_group_on_two_row_multiindex_returns_one_tuple_key(): From ae8721d3ead9455dcf05bb56baddb7a6eccfe9ef Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 3 Jan 2021 18:02:55 +0100 Subject: [PATCH 26/30] Add whatsnew entry for group-by HTML representation --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 136d1c87a8498..b84b945bc9d5e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -51,6 +51,7 @@ Other enhancements - :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`) - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) +- Added HTML representation for grouped DataFrame and Series (:issue:`34926`) .. --------------------------------------------------------------------------- From 1c92ed8ef9867248862c887cf8a1b7a004888b98 Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 3 Jan 2021 18:07:19 +0100 Subject: [PATCH 27/30] Fix test case name --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 5956a98db10dd..881482fb264a2 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2093,7 +2093,7 @@ def test_groups_repr_truncates(max_seq_items, expected): (20, 400, 5, 3), # Not all groups and not all rows in the groups are shown ], ) -def test_groupby_repr_not_all_groups(n_groups, n_rows, check_n_groups, check_n_rows): +def test_groupby_repr(n_groups, n_rows, check_n_groups, check_n_rows): # GH 34926 df = DataFrame( { From b92d61f5682bc12dd2eb3474c47941fcf419d88a Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 3 Jan 2021 18:36:47 +0100 Subject: [PATCH 28/30] Rename groupby objects --- pandas/tests/groupby/test_groupby.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 881482fb264a2..a31106b7ee6d1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2103,26 +2103,22 @@ def test_groupby_repr(n_groups, n_rows, check_n_groups, check_n_rows): } ) - df_groupby = df.groupby("C") - html_groupby = df_groupby._repr_html_() + gb = df.groupby("C") - df_from_html = pd.concat(pd.read_html(StringIO(html_groupby), index_col=0)) + df_from_html = pd.concat(pd.read_html(StringIO(gb._repr_html_()), index_col=0)) # Drop "..." rows and convert index and data to int df_from_html = df_from_html[df_from_html.index != "..."].astype(int) df_from_html.index = df_from_html.index.astype(int) # Iterate over the first and last "check_n_groups" groups - df_group_iter = ( - list(df_groupby)[:check_n_groups] + list(df_groupby)[-check_n_groups:] - ) - for group_name, df_group in df_group_iter: + gb_iter = list(gb)[:check_n_groups] + list(gb)[-check_n_groups:] + for group_name, df_group in gb_iter: # Iterate over the first and last "check_n_rows" of every group df_iter = pd.concat( [df_group.iloc[:check_n_rows], df_group.iloc[-check_n_rows:]] ).iterrows() for index, row in df_iter: - print(group_name, index) tm.assert_series_equal(row, df_from_html.loc[index]) From 579998adeb269ab43c6481557105183fa40941be Mon Sep 17 00:00:00 2001 From: JulianW Date: Sun, 3 Jan 2021 18:39:07 +0100 Subject: [PATCH 29/30] Add case for single and tuple groupby key --- pandas/io/formats/format.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2170bcf9b8e56..ffe375ba64738 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -2073,7 +2073,10 @@ def repr_html_groupby(group_obj) -> str: group_names = group_names[:n_start] + group_names[-n_end:] repr_html_list = list() for group_name in group_names: - group = group_obj.get_group(group_name) + if not isinstance(group_name, tuple): + group = group_obj.get_group((group_name, )) + else: + group = group_obj.get_group(group_name) if not hasattr(group, "to_html"): group = group.to_frame() repr_html_list.append( From 7a11be888a02d1d5689dfd33fbc56071bc7e8c69 Mon Sep 17 00:00:00 2001 From: JulianWgs <31596773+JulianWgs@users.noreply.github.com> Date: Sun, 4 Jul 2021 11:10:19 +0200 Subject: [PATCH 30/30] Move whats new to 1.4.0 release --- doc/source/whatsnew/v1.3.0.rst | 1 - doc/source/whatsnew/v1.4.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1d85cbe8a4671..ed66861efad93 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -250,7 +250,6 @@ Other enhancements - :func:`read_csv` now raising ``ParserWarning`` if length of header or given names does not match length of data when ``usecols`` is not specified (:issue:`21768`) - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) -- Added HTML representation for grouped DataFrame and Series (:issue:`34926`) - Added support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`) - :func:`read_excel` can now auto-detect .xlsb files and older .xls files (:issue:`35416`, :issue:`41225`) - :class:`ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behavior of append mode when writing to existing sheets (:issue:`40230`) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 24f307f23f435..75ef2b820290b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -29,6 +29,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- Added HTML representation for grouped DataFrame and Series (:issue:`34926`) - -