diff --git a/doc/source/release.rst b/doc/source/release.rst index ed22348e45c9f..fa541baa4e058 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -156,8 +156,8 @@ API Changes - ``to_excel`` now converts ``np.inf`` into a string representation, customizable by the ``inf_rep`` keyword argument (Excel has no native inf representation) (:issue:`6782`) -- Arithmetic ops are now disallowed when passed two bool dtype Series or - DataFrames (:issue:`6762`). +- Arithmetic ops on bool dtype arrays/scalars now give a warning indicating + that they are evaluated in Python space (:issue:`6762`, :issue:`7210`). - Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`) - ``DataFrame.plot`` and ``Series.plot`` now support a ``table`` keyword for plotting ``matplotlib.Table``. The ``table`` keyword can receive the following values. diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 56ea0a361e741..ba67e2cd4d4c8 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -186,17 +186,18 @@ API changes - Added ``factorize`` functions to ``Index`` and ``Series`` to get indexer and unique values (:issue:`7090`) - ``describe`` on a DataFrame with a mix of Timestamp and string like objects returns a different Index (:issue:`7088`). Previously the index was unintentionally sorted. -- arithmetic operations with **only** ``bool`` dtypes now raise an error - (:issue:`7011`, :issue:`6762`, :issue:`7015`) +- arithmetic operations with **only** ``bool`` dtypes warn for ``+``, ``-``, + and ``*`` operations and raise for all others (:issue:`7011`, :issue:`6762`, + :issue:`7015`, :issue:`7210`) .. code-block:: python x = pd.Series(np.random.rand(10) > 0.5) y = True - x * y + x + y # warning generated: should do x | y instead + x / y # this raises because it doesn't make sense - # this now raises for arith ops like ``+``, ``*``, etc. - NotImplementedError: operator '*' not implemented for bool dtypes + NotImplementedError: operator '/' not implemented for bool dtypes .. _whatsnew_0140.display: diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py index 4aff00e3a97d9..47d3fce618f89 100644 --- a/pandas/computation/expressions.py +++ b/pandas/computation/expressions.py @@ -6,6 +6,7 @@ """ +import warnings import numpy as np from pandas.core.common import _values_from_object from distutils.version import LooseVersion @@ -170,11 +171,23 @@ def _has_bool_dtype(x): return isinstance(x, (bool, np.bool_)) -def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('+', '*', '-', '/', - '//', '**'))): - if op_str in not_allowed and _has_bool_dtype(a) and _has_bool_dtype(b): - raise NotImplementedError("operator %r not implemented for bool " - "dtypes" % op_str) +def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')), + unsupported=None): + if unsupported is None: + unsupported = {'+': '|', '*': '&', '-': '^'} + + if _has_bool_dtype(a) and _has_bool_dtype(b): + if op_str in unsupported: + warnings.warn("evaluating in Python space because the %r operator" + " is not supported by numexpr for the bool " + "dtype, use %r instead" % (op_str, + unsupported[op_str])) + return False + + if op_str in not_allowed: + raise NotImplementedError("operator %r not implemented for bool " + "dtypes" % op_str) + return True def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, @@ -193,7 +206,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, return the results use_numexpr : whether to try to use numexpr (default True) """ - _bool_arith_check(op_str, a, b) + use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b) if use_numexpr: return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 777acdf30f1a0..8d012b871d8ca 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -343,8 +343,8 @@ def testit(): def test_bool_ops_raise_on_arithmetic(self): df = DataFrame({'a': np.random.rand(10) > 0.5, 'b': np.random.rand(10) > 0.5}) - names = 'add', 'mul', 'sub', 'div', 'truediv', 'floordiv', 'pow' - ops = '+', '*', '-', '/', '/', '//', '**' + names = 'div', 'truediv', 'floordiv', 'pow' + ops = '/', '/', '//', '**' msg = 'operator %r not implemented for bool dtypes' for op, name in zip(ops, names): if not compat.PY3 or name != 'div': @@ -369,6 +369,49 @@ def test_bool_ops_raise_on_arithmetic(self): with tm.assertRaisesRegexp(TypeError, err_msg): f(df, True) + def test_bool_ops_warn_on_arithmetic(self): + n = 10 + df = DataFrame({'a': np.random.rand(n) > 0.5, + 'b': np.random.rand(n) > 0.5}) + names = 'add', 'mul', 'sub' + ops = '+', '*', '-' + subs = {'+': '|', '*': '&', '-': '^'} + sub_funcs = {'|': 'or_', '&': 'and_', '^': 'xor'} + for op, name in zip(ops, names): + f = getattr(operator, name) + fe = getattr(operator, sub_funcs[subs[op]]) + + with tm.use_numexpr(True, min_elements=5): + with tm.assert_produces_warning(): + r = f(df, df) + e = fe(df, df) + tm.assert_frame_equal(r, e) + + with tm.assert_produces_warning(): + r = f(df.a, df.b) + e = fe(df.a, df.b) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(): + r = f(df.a, True) + e = fe(df.a, True) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(): + r = f(False, df.a) + e = fe(False, df.a) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(): + r = f(False, df) + e = fe(False, df) + tm.assert_frame_equal(r, e) + + with tm.assert_produces_warning(): + r = f(df, True) + e = fe(df, True) + tm.assert_frame_equal(r, e) + if __name__ == '__main__': import nose diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1235aa3cc89d9..e74cf487e75ac 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -35,6 +35,8 @@ raise_with_traceback, httplib ) +from pandas.computation import expressions as expr + from pandas import bdate_range from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex @@ -1576,3 +1578,14 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): np.random.set_state(self.start_state) + + +@contextmanager +def use_numexpr(use, min_elements=expr._MIN_ELEMENTS): + olduse = expr._USE_NUMEXPR + oldmin = expr._MIN_ELEMENTS + expr.set_use_numexpr(use) + expr._MIN_ELEMENTS = min_elements + yield + expr._MIN_ELEMENTS = oldmin + expr.set_use_numexpr(olduse)