From 4f9e8e189cab32c8261c5df84fa5f9a2353d57fc Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 28 Jan 2023 22:18:34 +0100 Subject: [PATCH 1/4] ENH: add escape math mode with escape=latex-math --- pandas/io/formats/style_render.py | 60 ++++++++++++++++++-- pandas/tests/io/formats/style/test_format.py | 2 +- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 5264342661b3f..b15e54c4b6f96 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -989,6 +989,8 @@ def format( Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with LaTeX-safe sequences. + Use 'latex-math' to replace the characters the same way as in 'latex' mode, + except for math substrings, which start and end with ``$``. Escaping is done before ``formatter``. .. versionadded:: 1.3.0 @@ -1105,16 +1107,28 @@ def format( NA ... - Using a ``formatter`` with LaTeX ``escape``. + Using a ``formatter`` with ``escape`` in 'latex' mode. - >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) + >>> df = pd.DataFrame([["123"], ["~ ^"], ["%#"]]) >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex() ... # doctest: +SKIP \begin{tabular}{ll} - {} & {0} \\ + & 0 \\ 0 & \textbf{123} \\ 1 & \textbf{\textasciitilde \space \textasciicircum } \\ - 2 & \textbf{\$\%\#} \\ + 2 & \textbf{\%\#} \\ + \end{tabular} + + Using ``escape`` in 'latex-math' mode. + + >>> df = pd.DataFrame([[r"$\sum_{i=1}^{10} a_i$ a~b $\alpha \ + ... = \frac{\beta}{\zeta^2}$"], ["%#^ $ \$x^2 $"]]) + >>> df.style.format(escape="latex-math").to_latex() + ... # doctest: +SKIP + \begin{tabular}{ll} + & 0 \\ + 0 & $\sum_{i=1}^{10} a_i$ a\textasciitilde b $\alpha = \frac{\beta}{\zeta^2}$ \\ + 1 & \%\#\textasciicircum \space $ \$x^2 $ \\ \end{tabular} Pandas defines a `number-format` pseudo CSS attribute instead of the `.format` @@ -1743,9 +1757,12 @@ def _str_escape(x, escape): return escape_html(x) elif escape == "latex": return _escape_latex(x) + elif escape == "latex-math": + return _escape_latex_math(x) else: raise ValueError( - f"`escape` only permitted in {{'html', 'latex'}}, got {escape}" + f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \ +got {escape}" ) return x @@ -2344,3 +2361,36 @@ def _escape_latex(s): .replace("^", "\\textasciicircum ") .replace("ab2§=§8yz", "\\textbackslash ") ) + + +def _escape_latex_math(s): + r""" + All characters between two characters ``$`` are preserved. + + The substrings in LaTeX math mode, which start with the character ``$`` + and end with ``$``, are preserved without escaping. Otherwise + regular LaTeX escaping applies. See ``_escape_latex()``. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + s = s.replace(r"\$", r"ab2§=§8yz") + pattern = re.compile(r"\$.*?\$") + pos = 0 + ps = pattern.search(s, pos) + res = [] + while ps: + res.append(_escape_latex(s[pos : ps.span()[0]])) + res.append(ps.group()) + pos = ps.span()[1] + ps = pattern.search(s, pos) + + res.append(_escape_latex(s[pos : len(s)])) + return "".join(res).replace(r"ab2§=§8yz", r"\$") diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 0b114ea128b0b..4e6d9d0eb881b 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -359,7 +359,7 @@ def test_format_decimal(formatter, thousands, precision, func, col): def test_str_escape_error(): - msg = "`escape` only permitted in {'html', 'latex'}, got " + msg = "`escape` only permitted in {'html', 'latex', 'latex-math'}, got " with pytest.raises(ValueError, match=msg): _str_escape("text", "bad_escape") From 43800a41993f4af8b209e58964b188d5045faf41 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sun, 29 Jan 2023 12:42:13 +0100 Subject: [PATCH 2/4] ENH: add test for escape=latex-math --- pandas/core/config_init.py | 2 +- pandas/tests/io/formats/style/test_format.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 2e1ddb3c0a628..79ad510883911 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -925,7 +925,7 @@ def register_converter_cb(key) -> None: "format.escape", None, styler_escape, - validator=is_one_of_factory([None, "html", "latex"]), + validator=is_one_of_factory([None, "html", "latex", "latex-math"]), ) cf.register_option( diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 4e6d9d0eb881b..0dec614970467 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -192,6 +192,15 @@ def test_format_escape_html(escape, exp): assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&" +def test_format_escape_latex_math(): + chars = r"$\frac{1}{2} \$ x^2$ ~%#^" + df = DataFrame([[chars]]) + + expected = r"$\frac{1}{2} \$ x^2$ \textasciitilde \%\#\textasciicircum " + s = df.style.format("{0}", escape="latex-math") + assert expected == s._translate(True, True)["body"][0][1]["display_value"] + + def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) @@ -403,6 +412,9 @@ def test_format_options(): with option_context("styler.format.escape", "latex"): ctx_with_op = df.style._translate(True, True) assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " + with option_context("styler.format.escape", "latex-math"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " # test option: formatter with option_context("styler.format.formatter", {"int": "{:,.2f}"}): From 2fd6d7f38da9bb0be4b857d8cd62ee820ef1d856 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 31 Jan 2023 18:05:34 +0100 Subject: [PATCH 3/4] change uuid string and revert the example for latex --- pandas/io/formats/style_render.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index b15e54c4b6f96..d15a22d3a5b61 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1109,14 +1109,14 @@ def format( Using a ``formatter`` with ``escape`` in 'latex' mode. - >>> df = pd.DataFrame([["123"], ["~ ^"], ["%#"]]) + >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex() ... # doctest: +SKIP \begin{tabular}{ll} & 0 \\ 0 & \textbf{123} \\ 1 & \textbf{\textasciitilde \space \textasciicircum } \\ - 2 & \textbf{\%\#} \\ + 2 & \textbf{\$\%\#} \\ \end{tabular} Using ``escape`` in 'latex-math' mode. @@ -2381,7 +2381,7 @@ def _escape_latex_math(s): str : Escaped string """ - s = s.replace(r"\$", r"ab2§=§8yz") + s = s.replace(r"\$", r"rt8§=§7wz") pattern = re.compile(r"\$.*?\$") pos = 0 ps = pattern.search(s, pos) @@ -2393,4 +2393,4 @@ def _escape_latex_math(s): ps = pattern.search(s, pos) res.append(_escape_latex(s[pos : len(s)])) - return "".join(res).replace(r"ab2§=§8yz", r"\$") + return "".join(res).replace(r"rt8§=§7wz", r"\$") From f80a1c482dd18d862544a6c63dd7fc45a34e5fc9 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 11 Feb 2023 15:36:02 +0100 Subject: [PATCH 4/4] add one line to whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c1d9b2744b27e..39cd67305bcbe 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -187,6 +187,7 @@ Other enhancements - Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`) - Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`) - Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`) +- Added new escape mode "latex-math" to avoid escaping "$" in formatter (:issue:`50040`) - .. ---------------------------------------------------------------------------