From 0767a83fb5230dd9ab22e77c448ec95fcd05b5b7 Mon Sep 17 00:00:00 2001 From: Ulrich Dobramysl <1979498+ulido@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:09:00 +0000 Subject: [PATCH 1/8] Add option to only merge column header cells in `ExcelFormatter`. --- pandas/_typing.py | 1 + pandas/io/formats/excel.py | 23 ++++++++++++++++------- pandas/io/formats/style.py | 3 ++- pandas/tests/io/excel/test_writers.py | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index d90596878ba51..09a3f58d6ab7f 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -510,6 +510,7 @@ def closed(self) -> bool: # ExcelWriter ExcelWriterIfSheetExists = Literal["error", "new", "replace", "overlay"] +ExcelWriterMergeCells = Union[bool, Literal["columns"]] # Offsets OffsetCalendar = Union[np.busdaycalendar, "AbstractHolidayCalendar"] diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index a98d9c175c2bd..7b968343b3b94 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -52,6 +52,7 @@ if TYPE_CHECKING: from pandas._typing import ( + ExcelWriterMergeCells, FilePath, IndexLabel, StorageOptions, @@ -523,8 +524,11 @@ class ExcelFormatter: Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. - merge_cells : bool, default False - Format MultiIndex and Hierarchical Rows as merged cells. + merge_cells : bool or 'columns', default False + Format MultiIndex column headers and Hierarchical Rows as mergedcells + if True. Merge MultiIndex column headers only if 'columns'. + .. versionchanged:: :ref: 3.0.0 + Added the 'columns' option. inf_rep : str, default `'inf'` representation for np.inf values (which aren't representable in Excel) A `'-'` sign will be added in front of -inf. @@ -547,7 +551,7 @@ def __init__( header: Sequence[Hashable] | bool = True, index: bool = True, index_label: IndexLabel | None = None, - merge_cells: bool = False, + merge_cells: ExcelWriterMergeCells = False, inf_rep: str = "inf", style_converter: Callable | None = None, ) -> None: @@ -580,6 +584,9 @@ def __init__( self.index = index self.index_label = index_label self.header = header + + if (not isinstance(merge_cells, bool)) and (merge_cells != "columns"): + raise ValueError("Unexpected value for 'merge_cells'.") self.merge_cells = merge_cells self.inf_rep = inf_rep @@ -614,7 +621,7 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: columns = self.columns level_strs = columns._format_multi( - sparsify=self.merge_cells, include_names=False + sparsify=self.merge_cells in {True, "columns"}, include_names=False ) level_lengths = get_level_lengths(level_strs) coloffset = 0 @@ -623,7 +630,7 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: if self.index and isinstance(self.df.index, MultiIndex): coloffset = self.df.index.nlevels - 1 - if self.merge_cells: + if self.merge_cells in {True, "columns"}: # Format multi-index as a merged cells. for lnum, name in enumerate(columns.names): yield ExcelCell( @@ -793,7 +800,9 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: # with index names (blank if None) for # unambiguous round-trip, unless not merging, # in which case the names all go on one row Issue #11328 - if isinstance(self.columns, MultiIndex) and self.merge_cells: + if isinstance(self.columns, MultiIndex) and ( + self.merge_cells in {True, "columns"} + ): self.rowcounter += 1 # if index labels are not empty go ahead and dump @@ -801,7 +810,7 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: for cidx, name in enumerate(index_labels): yield ExcelCell(self.rowcounter - 1, cidx, name, None) - if self.merge_cells: + if self.merge_cells and self.merge_cells != "columns": # Format hierarchical rows as merged cells. level_strs = self.df.index._format_multi( sparsify=True, include_names=False diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index a695c539977b3..6f4c2fa6c6eae 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -66,6 +66,7 @@ Axis, AxisInt, Concatenate, + ExcelWriterMergeCells, FilePath, IndexLabel, IntervalClosedType, @@ -551,7 +552,7 @@ def to_excel( startrow: int = 0, startcol: int = 0, engine: str | None = None, - merge_cells: bool = True, + merge_cells: ExcelWriterMergeCells = True, encoding: str | None = None, inf_rep: str = "inf", verbose: bool = True, diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ad1f22224bc0d..482b331332462 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -49,7 +49,7 @@ def frame(float_frame): return float_frame[:10] -@pytest.fixture(params=[True, False]) +@pytest.fixture(params=[True, False, "columns"]) def merge_cells(request): return request.param From adcde250b8b80fbd9c5240ea6e0c81726463ad8f Mon Sep 17 00:00:00 2001 From: Ulrich Dobramysl <1979498+ulido@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:18:37 +0000 Subject: [PATCH 2/8] Add entry in the whatsnew document --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3d869bf31f372..a1811da0a8fc5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -45,6 +45,7 @@ Other enhancements - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) +- Add option to only merge MultiIndex column header header cells in :func:`DataFrame.to_excel` (:issue:`35384`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) From cdf2714fdbad352e70ef6d9bf02111608a178618 Mon Sep 17 00:00:00 2001 From: Ulrich Dobramysl <1979498+ulido@users.noreply.github.com> Date: Mon, 24 Jun 2024 21:59:59 +0100 Subject: [PATCH 3/8] Remove erroneous `:ref:` from docstring Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/formats/excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 7b968343b3b94..ba2952b74682f 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -527,7 +527,7 @@ class ExcelFormatter: merge_cells : bool or 'columns', default False Format MultiIndex column headers and Hierarchical Rows as mergedcells if True. Merge MultiIndex column headers only if 'columns'. - .. versionchanged:: :ref: 3.0.0 + .. versionchanged:: 3.0.0 Added the 'columns' option. inf_rep : str, default `'inf'` representation for np.inf values (which aren't representable in Excel) From d6a302d68bb2b3538ba4677b1b6065bd9a9dba3b Mon Sep 17 00:00:00 2001 From: Ulrich Dobramysl <1979498+ulido@users.noreply.github.com> Date: Mon, 24 Jun 2024 22:00:23 +0100 Subject: [PATCH 4/8] Correct typo in docstring Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/formats/excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index ba2952b74682f..28015d12326fd 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -525,7 +525,7 @@ class ExcelFormatter: `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. merge_cells : bool or 'columns', default False - Format MultiIndex column headers and Hierarchical Rows as mergedcells + Format MultiIndex column headers and Hierarchical Rows as merged cells if True. Merge MultiIndex column headers only if 'columns'. .. versionchanged:: 3.0.0 Added the 'columns' option. From e3dd46d5f9cb445f6597c5c9ebb1029071e14ba0 Mon Sep 17 00:00:00 2001 From: Ulrich Dobramysl <1979498+ulido@users.noreply.github.com> Date: Mon, 24 Jun 2024 22:01:38 +0100 Subject: [PATCH 5/8] Remove superfluous parentheses from if statement; better error message Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/formats/excel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 28015d12326fd..5843ce336d698 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -585,8 +585,8 @@ def __init__( self.index_label = index_label self.header = header - if (not isinstance(merge_cells, bool)) and (merge_cells != "columns"): - raise ValueError("Unexpected value for 'merge_cells'.") + if not isinstance(merge_cells, bool) and merge_cells != "columns": + raise ValueError(f"Unexpected value for {merge_cells=}.) self.merge_cells = merge_cells self.inf_rep = inf_rep From 4d8c08bbfd6c51d2978423501eff7bf021132c25 Mon Sep 17 00:00:00 2001 From: Ulrich Dobramysl <1979498+ulido@users.noreply.github.com> Date: Mon, 24 Jun 2024 21:43:42 +0000 Subject: [PATCH 6/8] Fix missing double quote. --- pandas/io/formats/excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 5843ce336d698..52b5755558900 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -586,7 +586,7 @@ def __init__( self.header = header if not isinstance(merge_cells, bool) and merge_cells != "columns": - raise ValueError(f"Unexpected value for {merge_cells=}.) + raise ValueError(f"Unexpected value for {merge_cells=}.") self.merge_cells = merge_cells self.inf_rep = inf_rep From 4fd6c921c5531c45e62dfe477a50543f636c4689 Mon Sep 17 00:00:00 2001 From: Ulrich Dobramysl <1979498+ulido@users.noreply.github.com> Date: Tue, 25 Jun 2024 06:38:54 +0100 Subject: [PATCH 7/8] Wording of whatsnew entry Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a1811da0a8fc5..d11916d05153b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -45,7 +45,7 @@ Other enhancements - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) -- Add option to only merge MultiIndex column header header cells in :func:`DataFrame.to_excel` (:issue:`35384`) +- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) From 1d07ec39d73588eb1921f551173536b243c5058e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Jun 2024 17:08:42 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d11916d05153b..249104e45ec1e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -39,13 +39,13 @@ Other enhancements - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) +- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) -- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)