Skip to content

Commit 16d9159

Browse files
escape unescape sharp, single quote, double quote
1 parent 9c8c685 commit 16d9159

File tree

2 files changed

+90
-9
lines changed

2 files changed

+90
-9
lines changed

pandas/core/frame.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
cast,
3535
overload,
3636
)
37+
import urllib.parse
3738
import warnings
3839

3940
import numpy as np
@@ -4559,14 +4560,8 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
45594560
For other characters that fall outside the ASCII range (U+0001..U+007F)
45604561
and those that are not further specified in PEP 3131,
45614562
the query parser will raise an error.
4562-
This excludes whitespace different than the space character,
4563-
but also the hashtag (as it is used for comments) and the backtick
4564-
itself (backtick can also not be escaped).
4565-
4566-
In a special case, quotes that make a pair around a backtick can
4567-
confuse the parser.
4568-
For example, ```it's` > `that's``` will raise an error,
4569-
as it forms a quoted string (``'s > `that'``) with a backtick inside.
4563+
This excludes whitespace different than the space character
4564+
and the backtick itself (backtick cannot be escaped).
45704565
45714566
See also the `Python documentation about lexical analysis
45724567
<https://docs.python.org/3/reference/lexical_analysis.html>`__
@@ -4620,7 +4615,35 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
46204615
raise ValueError(msg)
46214616
kwargs["level"] = kwargs.pop("level", 0) + 1
46224617
kwargs["target"] = None
4623-
res = self.eval(expr, **kwargs)
4618+
4619+
# GH 59285
4620+
if any(("#" in col) or ("'" in col) or ('"' in col) for col in self.columns):
4621+
# Create a copy of `self` with column names escaped
4622+
escaped_self = self.copy()
4623+
escaped_self.columns = [
4624+
urllib.parse.quote(col) for col in escaped_self.columns
4625+
]
4626+
4627+
# In expr, escape column names between backticks
4628+
column_name_to_escaped_name = {
4629+
col: urllib.parse.quote(col) for col in self.columns
4630+
}
4631+
escaped_expr = "`".join(
4632+
(column_name_to_escaped_name.get(token, token) if (i % 2) else token)
4633+
for i, token in enumerate(expr.split("`"))
4634+
)
4635+
4636+
# eval
4637+
escaped_res = escaped_self.eval(escaped_expr, **kwargs)
4638+
4639+
# If `res` is a Series or DataFrame, unescape names
4640+
res = escaped_res.copy()
4641+
if isinstance(res, Series) and res.name:
4642+
res.name = urllib.parse.unquote(res.name)
4643+
elif isinstance(res, DataFrame):
4644+
res.columns = [urllib.parse.unquote(col) for col in res.columns]
4645+
else:
4646+
res = self.eval(expr, **kwargs)
46244647

46254648
try:
46264649
result = self.loc[res]

pandas/tests/computation/test_eval.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,6 +1978,64 @@ def test_eval_no_support_column_name(request, column):
19781978
tm.assert_frame_equal(result, expected)
19791979

19801980

1981+
def test_query_on_column_name_with_hashtag_character():
1982+
# GH 59285
1983+
df = DataFrame((1, 2, 3), columns=["a#"])
1984+
result = df.query("`a#` < 2")
1985+
expected = df[df["a#"] < 2]
1986+
tm.assert_frame_equal(result, expected)
1987+
1988+
1989+
def test_query_on_expr_with_comment():
1990+
# GH 59285
1991+
df = DataFrame((1, 2, 3), columns=["a#"])
1992+
result = df.query("`a#` < 2 # This is a comment")
1993+
expected = df[df["a#"] < 2]
1994+
tm.assert_frame_equal(result, expected)
1995+
1996+
1997+
def test_query_on_column_names_with_single_quote_character():
1998+
df = DataFrame(
1999+
[
2000+
{"it's": 1, "that's": 2},
2001+
{"it's": 3, "that's": 4},
2002+
{"it's": -1, "that's": -2},
2003+
{"it's": -3, "that's": -4},
2004+
]
2005+
)
2006+
result = df.query("`it's` < `that's`")
2007+
expected = df[df["it's"] < df["that's"]]
2008+
tm.assert_frame_equal(result, expected)
2009+
2010+
2011+
def test_query_on_column_names_with_double_quote_character():
2012+
df = DataFrame(
2013+
[
2014+
{'it"s': 1, 'that"s': 2},
2015+
{'it"s': 3, 'that"s': 4},
2016+
{'it"s': -1, 'that"s': -2},
2017+
{'it"s': -3, 'that"s': -4},
2018+
]
2019+
)
2020+
result = df.query('`it"s` < `that"s`')
2021+
expected = df[df['it"s'] < df['that"s']]
2022+
tm.assert_frame_equal(result, expected)
2023+
2024+
2025+
def test_query_on_column_names_with_single_quote_and_double_quote_character():
2026+
df = DataFrame(
2027+
[
2028+
{"it's": 1, 'that\'s "nice"': 2},
2029+
{"it's": 3, 'that\'s "nice"': 4},
2030+
{"it's": -1, 'that\'s "nice"': -2},
2031+
{"it's": -3, 'that\'s "nice"': -4},
2032+
]
2033+
)
2034+
result = df.query("`it's` < `that's \"nice\"`")
2035+
expected = df[df["it's"] < df['that\'s "nice"']]
2036+
tm.assert_frame_equal(result, expected)
2037+
2038+
19812039
def test_set_inplace():
19822040
# https://github.com/pandas-dev/pandas/issues/47449
19832041
# Ensure we don't only update the DataFrame inplace, but also the actual

0 commit comments

Comments
 (0)