Skip to content

BUG: df.eval can't concatenate string column and string via + #47734

Open
@Zeroto521

Description

@Zeroto521

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

In [1]: import pandas as pd

In [2]: df = pd.DataFrame({'a': ['a', 'b'], 'b': ['1', '2']})

In [3]: df
Out[3]:
   a  b
0  a  1
1  b  2

In [4]: df.eval('a + "a"')  # fail

TypeError: unsupported operand type(s) for +: 'object' and '<class 'str'>'

In [5]: df.convert_dtypes().eval('a + "a"')  # fail too

TypeError: Cannot interpret 'string[python]' as a data type

In [6]: df.eval("a + b")  # work
Out[6]:
0    a1
1    b2
dtype: object

In [7]: df.dtypes
Out[7]:
a    object
b    object
dtype: object

In [8]: df.convert_dtypes().dtypes
Out[8]:
a    string
b    string
dtype: object
`df.eval('a + "a"')`` error messages
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-4-588814d68130> in <cell line: 1>()
----> 1 df.eval('a + "a"')

~\mambaforge\envs\work\lib\site-packages\pandas\core\frame.py in eval(self, expr, inplace, **kwargs)
   4238         kwargs["resolvers"] = tuple(kwargs.get("resolvers", ())) + resolvers
   4239
-> 4240         return _eval(expr, inplace=inplace, **kwargs)
   4241
   4242     def select_dtypes(self, include=None, exclude=None) -> DataFrame:

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    348         )
    349
--> 350         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
    351
    352         # construct the engine and evaluate the parsed expression

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in __init__(self, expr, engine, parser, env, level)
    809         self.parser = parser
    810         self._visitor = PARSERS[parser](self.env, self.engine, self.parser)
--> 811         self.terms = self.parse()
    812
    813     @property

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in parse(self)
    828         Parse an expression.
    829         """
--> 830         return self._visitor.visit(self.expr)
    831
    832     @property

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    413         method = "visit_" + type(node).__name__
    414         visitor = getattr(self, method)
--> 415         return visitor(node, **kwargs)
    416
    417     def visit_Module(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit_Module(self, node, **kwargs)
    419             raise SyntaxError("only a single expression is allowed")
    420         expr = node.body[0]
--> 421         return self.visit(expr, **kwargs)
    422
    423     def visit_Expr(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    413         method = "visit_" + type(node).__name__
    414         visitor = getattr(self, method)
--> 415         return visitor(node, **kwargs)
    416
    417     def visit_Module(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit_Expr(self, node, **kwargs)
    422
    423     def visit_Expr(self, node, **kwargs):
--> 424         return self.visit(node.value, **kwargs)
    425
    426     def _rewrite_membership_op(self, node, left, right):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    413         method = "visit_" + type(node).__name__
    414         visitor = getattr(self, method)
--> 415         return visitor(node, **kwargs)
    416
    417     def visit_Module(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit_BinOp(self, node, **kwargs)
    536         op, op_class, left, right = self._maybe_transform_eq_ne(node)
    537         left, right = self._maybe_downcast_constants(left, right)
--> 538         return self._maybe_evaluate_binop(op, op_class, left, right)
    539
    540     def visit_Div(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in _maybe_evaluate_binop(self, op, op_class, lhs, rhs, eval_in_python, maybe_eval_in_python)
    506
    507         if res.has_invalid_return_type:
--> 508             raise TypeError(
    509                 f"unsupported operand type(s) for {res.op}: "
    510                 f"'{lhs.type}' and '{rhs.type}'"

TypeError: unsupported operand type(s) for +: 'object' and '<class 'str'>'
`df.convert_dtypes().eval('a + "a"')` error messages
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-42cd31671fee> in <cell line: 1>()
----> 1 df.convert_dtypes().eval('a + "a"')

~\mambaforge\envs\work\lib\site-packages\pandas\core\frame.py in eval(self, expr, inplace, **kwargs)
   4238         kwargs["resolvers"] = tuple(kwargs.get("resolvers", ())) + resolvers
   4239
-> 4240         return _eval(expr, inplace=inplace, **kwargs)
   4241
   4242     def select_dtypes(self, include=None, exclude=None) -> DataFrame:

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    348         )
    349
--> 350         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
    351
    352         # construct the engine and evaluate the parsed expression

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in __init__(self, expr, engine, parser, env, level)
    809         self.parser = parser
    810         self._visitor = PARSERS[parser](self.env, self.engine, self.parser)
--> 811         self.terms = self.parse()
    812
    813     @property

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in parse(self)
    828         Parse an expression.
    829         """
--> 830         return self._visitor.visit(self.expr)
    831
    832     @property

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    413         method = "visit_" + type(node).__name__
    414         visitor = getattr(self, method)
--> 415         return visitor(node, **kwargs)
    416
    417     def visit_Module(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit_Module(self, node, **kwargs)
    419             raise SyntaxError("only a single expression is allowed")
    420         expr = node.body[0]
--> 421         return self.visit(expr, **kwargs)
    422
    423     def visit_Expr(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    413         method = "visit_" + type(node).__name__
    414         visitor = getattr(self, method)
--> 415         return visitor(node, **kwargs)
    416
    417     def visit_Module(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit_Expr(self, node, **kwargs)
    422
    423     def visit_Expr(self, node, **kwargs):
--> 424         return self.visit(node.value, **kwargs)
    425
    426     def _rewrite_membership_op(self, node, left, right):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    413         method = "visit_" + type(node).__name__
    414         visitor = getattr(self, method)
--> 415         return visitor(node, **kwargs)
    416
    417     def visit_Module(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in visit_BinOp(self, node, **kwargs)
    536         op, op_class, left, right = self._maybe_transform_eq_ne(node)
    537         left, right = self._maybe_downcast_constants(left, right)
--> 538         return self._maybe_evaluate_binop(op, op_class, left, right)
    539
    540     def visit_Div(self, node, **kwargs):

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\expr.py in _maybe_evaluate_binop(self, op, op_class, lhs, rhs, eval_in_python, maybe_eval_in_python)
    505         res = op(lhs, rhs)
    506
--> 507         if res.has_invalid_return_type:
    508             raise TypeError(
    509                 f"unsupported operand type(s) for {res.op}: "

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\ops.py in has_invalid_return_type(self)
    243         types = self.operand_types
    244         obj_dtype_set = frozenset([np.dtype("object")])
--> 245         return self.return_type == object and types - obj_dtype_set
    246
    247     @property

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\ops.py in return_type(self)
    237         if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS):
    238             return np.bool_
--> 239         return result_type_many(*(term.type for term in com.flatten(self)))
    240
    241     @property

~\mambaforge\envs\work\lib\site-packages\pandas\core\computation\common.py in result_type_many(*arrays_and_dtypes)
     21     """
     22     try:
---> 23         return np.result_type(*arrays_and_dtypes)
     24     except ValueError:
     25         # we have > NPY_MAXARGS terms in our expression

~\mambaforge\envs\work\lib\site-packages\numpy\core\overrides.py in result_type(*args, **kwargs)

TypeError: Cannot interpret 'string[python]' as a data type

Issue Description

df.eval can't concatenate string column and string via + operator like df.eval("string_column + 'a string'")

I also tried some other cases, but they also failed.

  • df.astype(str).eval("a + 'a'") fail
  • df.eval("a + 'a'", engine="python") fail

Expected Behavior

>>> df.eval("a + 'a'")
0    aa
1    ba
Name: a, dtype: object

Installed Versions

While run pd.show_versions() I noted I don't have 'numexpr'.
So after finishing installing 'numexpr', above cases still couldn't work.

INSTALLED VERSIONS

commit : e8093ba
python : 3.9.13.final.0
python-bits : 64
OS : Windows
OS-release : 10
Version : 10.0.19044
machine : AMD64
processor : Intel64 Family 6 Model 158 Stepping 10, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : Chinese (Simplified)_China.936

pandas : 1.4.3
numpy : 1.22.4
pytz : 2022.1
dateutil : 2.8.2
setuptools : 62.6.0
pip : 22.1.2
Cython : None
pytest : None
hypothesis : None
sphinx : 5.0.2
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.2
IPython : 7.33.0
pandas_datareader: None
bs4 : 4.11.1
bottleneck : None
brotli :
fastparquet : None
fsspec : None
gcsfs : None
markupsafe : 2.1.1
matplotlib : 3.5.2
numba : None
numexpr : None
odfpy : None
openpyxl : 3.0.9
pandas_gbq : None
pyarrow : None
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : 1.8.1
snappy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : 2022.3.0
xlrd : 1.2.0
xlwt : 1.3.0
zstandard : None

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions