diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py index a2d989e787e0f..a88c4374b7030 100644 --- a/asv_bench/benchmarks/io/excel.py +++ b/asv_bench/benchmarks/io/excel.py @@ -47,6 +47,25 @@ def time_write_excel(self, engine): writer.save() +class WriteExcelStyled: + params = ["openpyxl", "xlsxwriter"] + param_names = ["engine"] + + def setup(self, engine): + self.df = _generate_dataframe() + + def time_write_excel_style(self, engine): + bio = BytesIO() + bio.seek(0) + writer = ExcelWriter(bio, engine=engine) + df_style = self.df.style + df_style.applymap(lambda x: "border: red 1px solid;") + df_style.applymap(lambda x: "color: blue") + df_style.applymap(lambda x: "border-color: green black", subset=["float1"]) + df_style.to_excel(writer, sheet_name="Sheet1") + writer.save() + + class ReadExcel: params = ["xlrd", "openpyxl", "odf"] diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6cc0190f00e31..a4fa31f7fc368 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -752,6 +752,7 @@ Performance improvements - Performance improvement in :func:`factorize` (:issue:`46109`) - Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`) - Performance improvement in :func:`read_excel` when ``nrows`` argument provided (:issue:`32727`) +- Performance improvement in :meth:`.Styler.to_excel` when applying repeated CSS formats (:issue:`47371`) - Performance improvement in :meth:`MultiIndex.is_monotonic_increasing` (:issue:`47458`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index a6d2645590dde..92dafffc9c3de 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -7,6 +7,7 @@ from typing import ( Callable, Generator, + Iterable, Iterator, ) import warnings @@ -188,9 +189,24 @@ class CSSResolver: SIDES = ("top", "right", "bottom", "left") + CSS_EXPANSIONS = { + **{ + "-".join(["border", prop] if prop else ["border"]): _border_expander(prop) + for prop in ["", "top", "right", "bottom", "left"] + }, + **{ + "-".join(["border", prop]): _side_expander("border-{:s}-" + prop) + for prop in ["color", "style", "width"] + }, + **{ + "margin": _side_expander("margin-{:s}"), + "padding": _side_expander("padding-{:s}"), + }, + } + def __call__( self, - declarations_str: str, + declarations: str | Iterable[tuple[str, str]], inherited: dict[str, str] | None = None, ) -> dict[str, str]: """ @@ -198,8 +214,10 @@ def __call__( Parameters ---------- - declarations_str : str - A list of CSS declarations + declarations_str : str | Iterable[tuple[str, str]] + A CSS string or set of CSS declaration tuples + e.g. "font-weight: bold; background: blue" or + {("font-weight", "bold"), ("background", "blue")} inherited : dict, optional Atomic properties indicating the inherited style context in which declarations_str is to be resolved. ``inherited`` should already @@ -230,7 +248,9 @@ def __call__( ('font-size', '24pt'), ('font-weight', 'bold')] """ - props = dict(self.atomize(self.parse(declarations_str))) + if isinstance(declarations, str): + declarations = self.parse(declarations) + props = dict(self.atomize(declarations)) if inherited is None: inherited = {} @@ -347,28 +367,15 @@ def _error(): size_fmt = f"{val:f}pt" return size_fmt - def atomize(self, declarations) -> Generator[tuple[str, str], None, None]: + def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]: for prop, value in declarations: - attr = "expand_" + prop.replace("-", "_") - try: - expand = getattr(self, attr) - except AttributeError: - yield prop, value + prop = prop.lower() + value = value.lower() + if prop in self.CSS_EXPANSIONS: + expand = self.CSS_EXPANSIONS[prop] + yield from expand(self, prop, value) else: - for prop, value in expand(prop, value): - yield prop, value - - expand_border = _border_expander() - expand_border_top = _border_expander("top") - expand_border_right = _border_expander("right") - expand_border_bottom = _border_expander("bottom") - expand_border_left = _border_expander("left") - - expand_border_color = _side_expander("border-{:s}-color") - expand_border_style = _side_expander("border-{:s}-style") - expand_border_width = _side_expander("border-{:s}-width") - expand_margin = _side_expander("margin-{:s}") - expand_padding = _side_expander("padding-{:s}") + yield prop, value def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]: """ diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 8478b72d97a5e..811b079c3c693 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -3,7 +3,10 @@ """ from __future__ import annotations -from functools import reduce +from functools import ( + lru_cache, + reduce, +) import itertools import re from typing import ( @@ -85,10 +88,13 @@ def __init__( **kwargs, ) -> None: if css_styles and css_converter: - css = ";".join( - [a + ":" + str(v) for (a, v) in css_styles[css_row, css_col]] - ) - style = css_converter(css) + # Use dict to get only one (case-insensitive) declaration per property + declaration_dict = { + prop.lower(): val for prop, val in css_styles[css_row, css_col] + } + # Convert to frozenset for order-invariant caching + unique_declarations = frozenset(declaration_dict.items()) + style = css_converter(unique_declarations) return super().__init__(row=row, col=col, val=val, style=style, **kwargs) @@ -166,15 +172,19 @@ def __init__(self, inherited: str | None = None) -> None: compute_css = CSSResolver() - def __call__(self, declarations_str: str) -> dict[str, dict[str, str]]: + @lru_cache(maxsize=None) + def __call__( + self, declarations: str | frozenset[tuple[str, str]] + ) -> dict[str, dict[str, str]]: """ Convert CSS declarations to ExcelWriter style. Parameters ---------- - declarations_str : str - List of CSS declarations. - e.g. "font-weight: bold; background: blue" + declarations : str | frozenset[tuple[str, str]] + CSS string or set of CSS declaration tuples. + e.g. "font-weight: bold; background: blue" or + {("font-weight", "bold"), ("background", "blue")} Returns ------- @@ -182,8 +192,7 @@ def __call__(self, declarations_str: str) -> dict[str, dict[str, str]]: A style as interpreted by ExcelWriter when found in ExcelCell.style. """ - # TODO: memoize? - properties = self.compute_css(declarations_str, self.inherited) + properties = self.compute_css(declarations, self.inherited) return self.build_xlstyle(properties) def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]: diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py index b95a5b4365f43..b98fd74643207 100644 --- a/pandas/tests/io/formats/test_to_excel.py +++ b/pandas/tests/io/formats/test_to_excel.py @@ -11,7 +11,10 @@ import pandas._testing as tm from pandas.io.formats.css import CSSWarning -from pandas.io.formats.excel import CSSToExcelConverter +from pandas.io.formats.excel import ( + CssExcelCell, + CSSToExcelConverter, +) @pytest.mark.parametrize( @@ -340,3 +343,89 @@ def test_css_named_colors_from_mpl_present(): pd_colors = CSSToExcelConverter.NAMED_COLORS for name, color in mpl_colors.items(): assert name in pd_colors and pd_colors[name] == color[1:] + + +@pytest.mark.parametrize( + "styles,expected", + [ + ([("color", "green"), ("color", "red")], "color: red;"), + ([("font-weight", "bold"), ("font-weight", "normal")], "font-weight: normal;"), + ([("text-align", "center"), ("TEXT-ALIGN", "right")], "text-align: right;"), + ], +) +def test_css_excel_cell_precedence(styles, expected): + """It applies favors latter declarations over former declarations""" + # See GH 47371 + converter = CSSToExcelConverter() + converter.__call__.cache_clear() + css_styles = {(0, 0): styles} + cell = CssExcelCell( + row=0, + col=0, + val="", + style=None, + css_styles=css_styles, + css_row=0, + css_col=0, + css_converter=converter, + ) + converter.__call__.cache_clear() + + assert cell.style == converter(expected) + + +@pytest.mark.parametrize( + "styles,cache_hits,cache_misses", + [ + ([[("color", "green"), ("color", "red"), ("color", "green")]], 0, 1), + ( + [ + [("font-weight", "bold")], + [("font-weight", "normal"), ("font-weight", "bold")], + ], + 1, + 1, + ), + ([[("text-align", "center")], [("TEXT-ALIGN", "center")]], 1, 1), + ( + [ + [("font-weight", "bold"), ("text-align", "center")], + [("font-weight", "bold"), ("text-align", "left")], + ], + 0, + 2, + ), + ( + [ + [("font-weight", "bold"), ("text-align", "center")], + [("font-weight", "bold"), ("text-align", "left")], + [("font-weight", "bold"), ("text-align", "center")], + ], + 1, + 2, + ), + ], +) +def test_css_excel_cell_cache(styles, cache_hits, cache_misses): + """It caches unique cell styles""" + # See GH 47371 + converter = CSSToExcelConverter() + converter.__call__.cache_clear() + + css_styles = {(0, i): _style for i, _style in enumerate(styles)} + for css_row, css_col in css_styles: + CssExcelCell( + row=0, + col=0, + val="", + style=None, + css_styles=css_styles, + css_row=css_row, + css_col=css_col, + css_converter=converter, + ) + cache_info = converter.__call__.cache_info() + converter.__call__.cache_clear() + + assert cache_info.hits == cache_hits + assert cache_info.misses == cache_misses