From 15f0caaad679f2a8f07e4f370b6d67b64049b7ca Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 25 Nov 2019 18:40:19 -0800 Subject: [PATCH 01/12] REF: implement scalar ops blockwise --- pandas/core/arrays/datetimelike.py | 31 +++++++++++++++++--- pandas/core/arrays/datetimes.py | 6 +++- pandas/core/arrays/timedeltas.py | 4 +-- pandas/core/internals/blocks.py | 14 ++++++++- pandas/core/internals/managers.py | 40 +++++++++++++------------- pandas/core/ops/__init__.py | 5 ++++ pandas/core/ops/array_ops.py | 21 ++++++++++---- pandas/tests/arithmetic/test_period.py | 8 +++--- pandas/tests/arrays/test_datetimes.py | 4 --- pandas/tests/arrays/test_timedeltas.py | 4 --- pandas/tests/test_expressions.py | 20 ++++++------- 11 files changed, 99 insertions(+), 58 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index dc3c49b7e06a9..46327a09ac375 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -325,6 +325,24 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray) _generate_range """ + @property + def ndim(self) -> int: + return self._data.ndim + + @property + def shape(self): + return self._data.shape + + def reshape(self, *args, **kwargs): + # Note: we drop any freq + data = self._data.reshape(*args, **kwargs) + return type(self)(data, dtype=self.dtype) + + def ravel(self, *args, **kwargs): + # Note: we drop any freq + data = self._data.ravel(*args, **kwargs) + return type(self)(data, dtype=self.dtype) + @property def _box_func(self): """ @@ -413,7 +431,10 @@ def __getitem__(self, key): getitem = self._data.__getitem__ if is_int: val = getitem(key) - return self._box_func(val) + if lib.is_scalar(val): + # i.e. self.ndim == 1 + return self._box_func(val) + return type(self)(val, dtype=self.dtype) if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) @@ -826,6 +847,8 @@ def inferred_freq(self): generated by infer_freq. Returns None if it can't autodetect the frequency. """ + if self.ndim != 1: + return None try: return frequencies.infer_freq(self) except ValueError: @@ -981,7 +1004,7 @@ def _add_timedeltalike_scalar(self, other): """ if isna(other): # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds - new_values = np.empty(len(self), dtype="i8") + new_values = np.empty(self.shape, dtype="i8") new_values[:] = iNaT return new_values @@ -1029,7 +1052,7 @@ def _add_nat(self): # GH#19124 pd.NaT is treated like a timedelta for both timedelta # and datetime dtypes - result = np.zeros(len(self), dtype=np.int64) + result = np.zeros(self.shape, dtype=np.int64) result.fill(iNaT) return type(self)(result, dtype=self.dtype, freq=None) @@ -1043,7 +1066,7 @@ def _sub_nat(self): # For datetime64 dtypes by convention we treat NaT as a datetime, so # this subtraction returns a timedelta64 dtype. # For period dtype, timedelta64 is a close-enough return dtype. - result = np.zeros(len(self), dtype=np.int64) + result = np.zeros(self.shape, dtype=np.int64) result.fill(iNaT) return result.view("timedelta64[ns]") diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 71420e6e58090..5352fdf384ae7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -354,7 +354,7 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): "ndarray, or Series or Index containing one of those." ) raise ValueError(msg.format(type(values).__name__)) - if values.ndim != 1: + if values.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") if values.dtype == "i8": @@ -812,6 +812,10 @@ def _sub_datetime_arraylike(self, other): return new_values.view("timedelta64[ns]") def _add_offset(self, offset): + if self.ndim == 2: + # TODO: does order matter here? + return self.ravel()._add_offset(offset).reshape(self.shape) + assert not isinstance(offset, Tick) try: if self.tz is not None: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index bacd0b9699e93..46d9c2e7574d1 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -221,7 +221,7 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): "ndarray, or Series or Index containing one of those." ) raise ValueError(msg.format(type(values).__name__)) - if values.ndim != 1: + if values.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") if values.dtype == "i8": @@ -1075,8 +1075,6 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): ) data = np.array(data, copy=copy) - if data.ndim != 1: - raise ValueError("Only 1-dimensional input arrays are supported.") assert data.dtype == "m8[ns]", data return data, inferred_freq diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2d6ffb7277742..3bbcee0063053 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -371,7 +371,19 @@ def apply(self, func, **kwargs): """ with np.errstate(all="ignore"): result = func(self.values, **kwargs) - if not isinstance(result, Block): + + if is_extension_array_dtype(result) and result.ndim > 1: + # if we 2D ExtensionArray, we need to split it into 1D pieces + nbs = [] + for i, loc in enumerate(self.mgr_locs): + vals = result[i] + nv = _block_shape(vals, ndim=self.ndim) + block = self.make_block(values=nv, placement=[loc]) + nbs.append(block) + return nbs + + if not isinstance(result, Block) and np.ndim(result) != 0: + # Exclude the 0-dim case so we can do reductions result = self.make_block(values=_block_shape(result, ndim=self.ndim)) return result diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c37a8ea5e42a4..86152cdca0bc1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -346,16 +346,10 @@ def _verify_integrity(self): ) def apply( - self, - f, - axes=None, - filter=None, - do_integrity_check=False, - consolidate=True, - **kwargs, + self, f, axes=None, filter=None, consolidate: bool = True, **kwargs, ): """ - iterate over the blocks, collect and create a new block manager + Iterate over the blocks, collect and create a new BlockManager. Parameters ---------- @@ -363,16 +357,20 @@ def apply( axes : optional (if not supplied, use self.axes) filter : list, if supplied, only call the block if the filter is in the block - do_integrity_check : boolean, default False. Do the block manager - integrity check - consolidate: boolean, default True. Join together blocks having same - dtype + consolidate: bool, default True + Join together blocks having same dtype. Returns ------- - Block Manager (new object) - + BlockManager (new object) """ + if axes is not None: + assert len(axes) == len(self.axes), (axes, self.axes) + for ax, own in zip(axes, self.axes): + assert ax.equals(own), (ax, own) + + # assert filter is None, filter --> nope, in test_replace we + # get e.g. ["a"], ["A"], ["zero"], ["b"] result_blocks = [] @@ -430,14 +428,15 @@ def apply( axis = obj._info_axis_number kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy) - applied = getattr(b, f)(**kwargs) + if callable(f): + applied = b.apply(f, **kwargs) + else: + applied = getattr(b, f)(**kwargs) result_blocks = _extend_blocks(applied, result_blocks) if len(result_blocks) == 0: return self.make_empty(axes or self.axes) - bm = self.__class__( - result_blocks, axes or self.axes, do_integrity_check=do_integrity_check - ) + bm = self.__class__(result_blocks, axes or self.axes, do_integrity_check=False) bm._consolidate_inplace() return bm @@ -763,7 +762,7 @@ def copy(self, deep=True): Parameters ---------- - deep : boolean o rstring, default True + deep : boolean or string, default True If False, return shallow copy (do not copy data) If 'all', copy data and a deep copy of the index @@ -773,6 +772,7 @@ def copy(self, deep=True): """ # this preserves the notion of view copying of axes if deep: + # hit in e.g. tests.io.json.test_pandas if deep == "all": copy = lambda ax: ax.copy(deep=True) else: @@ -780,7 +780,7 @@ def copy(self, deep=True): new_axes = [copy(ax) for ax in self.axes] else: new_axes = list(self.axes) - return self.apply("copy", axes=new_axes, deep=deep, do_integrity_check=False) + return self.apply("copy", axes=new_axes, deep=deep) def as_array(self, transpose=False, items=None): """Convert the blockmanager data into an numpy array. diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d14fb040c4e30..5fc6fd6ca8579 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -26,6 +26,7 @@ arithmetic_op, comparison_op, define_na_arithmetic_op, + get_array_op, logical_op, ) from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401 @@ -340,6 +341,10 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: + array_op = get_array_op(func, str_rep=str_rep) + bm = left._data.apply(array_op, right=right) + return type(left)(bm) + def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 414e241af7bbd..59fd4e0d8c072 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -2,6 +2,7 @@ Functions for arithmetic and comparison operations on NumPy arrays and ExtensionArrays. """ +from functools import partial import operator from typing import Any, Union @@ -54,10 +55,10 @@ def comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, (ABCSeries, ABCIndex)): y = y.values - result = libops.vec_compare(x, y, op) + result = libops.vec_compare(x.ravel(), y, op) else: - result = libops.scalar_compare(x, y, op) - return result + result = libops.scalar_compare(x.ravel(), y, op) + return result.reshape(x.shape) def masked_arith_op(x, y, op): @@ -252,9 +253,9 @@ def comparison_op( elif is_scalar(rvalues) and isna(rvalues): # numpy does not like comparisons vs None if op is operator.ne: - res_values = np.ones(len(lvalues), dtype=bool) + res_values = np.ones(lvalues.shape, dtype=bool) else: - res_values = np.zeros(len(lvalues), dtype=bool) + res_values = np.zeros(lvalues.shape, dtype=bool) elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) @@ -382,3 +383,13 @@ def fill_bool(x, left=None): res_values = filler(res_values) # type: ignore return res_values + + +def get_array_op(op, str_rep=None): + op_name = op.__name__.strip("_") + if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: + return partial(comparison_op, op=op) + elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: + return partial(logical_op, op=op) + else: + return partial(arithmetic_op, op=op, str_rep=str_rep) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index ed693d873efb8..d4fdeffa2c2db 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -755,18 +755,18 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_offset_n_gt1(self, box_transpose_fail): + def test_pi_add_offset_n_gt1(self, box_with_array): # GH#23215 # add offset to PeriodIndex with freq.n > 1 - box, transpose = box_transpose_fail + box = box_with_array per = pd.Period("2016-01", freq="2M") pi = pd.PeriodIndex([per]) expected = pd.PeriodIndex(["2016-03"], freq="2M") - pi = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + pi = tm.box_expected(pi, box) + expected = tm.box_expected(expected, box) result = pi + per.freq tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index c3cda22497ecb..eebb5e7107e88 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -23,10 +23,6 @@ def test_from_sequence_invalid_type(self): def test_only_1dim_accepted(self): arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 2-dim - DatetimeArray(arr.reshape(2, 2)) - with pytest.raises(ValueError, match="Only 1-dimensional"): # 0-dim DatetimeArray(arr[[0]].squeeze()) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 42e7bee97e671..ed4f3133a6a25 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -11,10 +11,6 @@ def test_only_1dim_accepted(self): # GH#25282 arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 2-dim - TimedeltaArray(arr.reshape(2, 2)) - with pytest.raises(ValueError, match="Only 1-dimensional"): # 0-dim TimedeltaArray(arr[[0]].squeeze()) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 1974f712b13ee..351346e2076d2 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -277,11 +277,9 @@ def test_bool_ops_raise_on_arithmetic(self, op_str, opname): with pytest.raises(NotImplementedError, match=err_msg): f(False, df.a) - with pytest.raises(NotImplementedError, match=err_msg): - f(False, df) + f(False, df) - with pytest.raises(NotImplementedError, match=err_msg): - f(df, True) + f(df, True) @pytest.mark.parametrize( "op_str,opname", [("+", "add"), ("*", "mul"), ("-", "sub")] @@ -321,15 +319,13 @@ def test_bool_ops_warn_on_arithmetic(self, op_str, opname): e = fe(False, df.a) tm.assert_series_equal(r, e) - with tm.assert_produces_warning(check_stacklevel=False): - r = f(False, df) - e = fe(False, df) - tm.assert_frame_equal(r, e) + r = f(False, df) + e = fe(False, df) + tm.assert_frame_equal(r, e) - with tm.assert_produces_warning(check_stacklevel=False): - r = f(df, True) - e = fe(df, True) - tm.assert_frame_equal(r, e) + r = f(df, True) + e = fe(df, True) + tm.assert_frame_equal(r, e) @pytest.mark.parametrize( "test_input,expected", From 016ae649bdecb8450a0a609ec4bf68dc5976d15f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Dec 2019 08:36:11 -0800 Subject: [PATCH 02/12] fix missing name --- pandas/core/internals/managers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a9cbadd2ac6f3..cd444a61704ed 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -753,6 +753,7 @@ def copy(self, deep=True): copy : BlockManager """ # this preserves the notion of view copying of axes + new_axes = self.axes if deep: # hit in e.g. tests.io.json.test_pandas From 453609782eac5ad9d47dad187f6dab368dd46393 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Dec 2019 08:37:32 -0800 Subject: [PATCH 03/12] revert --- pandas/core/internals/managers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cd444a61704ed..0edb5945d813d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -753,7 +753,6 @@ def copy(self, deep=True): copy : BlockManager """ # this preserves the notion of view copying of axes - new_axes = self.axes if deep: # hit in e.g. tests.io.json.test_pandas @@ -764,6 +763,8 @@ def copy_func(ax): return ax.view() new_axes = [copy_func(ax) for ax in self.axes] + else: + new_axes = list(self.axes) res = self.apply("copy", deep=deep) res.axes = new_axes From 1fc1e3ec33be2e439d0280cb42a51d613f35fce6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 16:41:09 -0800 Subject: [PATCH 04/12] Fix numexpr tests --- pandas/core/arrays/datetimes.py | 1 - pandas/core/internals/blocks.py | 4 ++-- pandas/core/internals/managers.py | 2 -- pandas/core/ops/__init__.py | 2 +- pandas/tests/arrays/test_datetimes.py | 4 ++++ pandas/tests/arrays/test_timedeltas.py | 5 +++++ pandas/tests/test_expressions.py | 20 ++++++++++++-------- 7 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 71e9ced3f5b2d..280fda60f5f7f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -789,7 +789,6 @@ def _sub_datetime_arraylike(self, other): def _add_offset(self, offset): if self.ndim == 2: - # TODO: does order matter here? return self.ravel()._add_offset(offset).reshape(self.shape) assert not isinstance(offset, Tick) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4b1389a4465c1..c7c7e8d94fd3b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -370,7 +370,7 @@ def apply(self, func, **kwargs): result = func(self.values, **kwargs) if is_extension_array_dtype(result) and result.ndim > 1: - # if we 2D ExtensionArray, we need to split it into 1D pieces + # if we get a 2D ExtensionArray, we need to split it into 1D pieces nbs = [] for i, loc in enumerate(self.mgr_locs): vals = result[i] @@ -379,7 +379,7 @@ def apply(self, func, **kwargs): nbs.append(block) return nbs - if not isinstance(result, Block) and np.ndim(result) != 0: + if not isinstance(result, Block):# and np.ndim(result) != 0: # Exclude the 0-dim case so we can do reductions result = self.make_block(values=_block_shape(result, ndim=self.ndim)) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 144f22252ed8e..fc926f1a072ba 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -754,7 +754,6 @@ def copy(self, deep=True): """ # this preserves the notion of view copying of axes if deep: - # hit in e.g. tests.io.json.test_pandas def copy_func(ax): if deep == "all": @@ -765,7 +764,6 @@ def copy_func(ax): new_axes = [copy_func(ax) for ax in self.axes] else: new_axes = list(self.axes) - res = self.apply("copy", deep=deep) res.axes = new_axes return res diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 03bad0c6e1b18..24d1ebe77ade3 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -718,7 +718,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if fill_value is not None: self = self.fillna(fill_value) - new_data = dispatch_to_series(self, other, op) + new_data = dispatch_to_series(self, other, op, str_rep) return self._construct_result(new_data) f.__name__ = op_name diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index eebb5e7107e88..d5ec473f4c74d 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -23,6 +23,10 @@ def test_from_sequence_invalid_type(self): def test_only_1dim_accepted(self): arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + DatetimeArray(arr.reshape(2, 2, 1)) + with pytest.raises(ValueError, match="Only 1-dimensional"): # 0-dim DatetimeArray(arr[[0]].squeeze()) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index ed4f3133a6a25..f151623044050 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -11,6 +11,11 @@ def test_only_1dim_accepted(self): # GH#25282 arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + TimedeltaArray(arr.reshape(2, 2, 1)) + with pytest.raises(ValueError, match="Only 1-dimensional"): # 0-dim TimedeltaArray(arr[[0]].squeeze()) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index b6753cedfdf46..9808c3d78b436 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -277,9 +277,11 @@ def test_bool_ops_raise_on_arithmetic(self, op_str, opname): with pytest.raises(NotImplementedError, match=err_msg): f(False, df.a) - f(False, df) + with pytest.raises(NotImplementedError, match=err_msg): + f(False, df) - f(df, True) + with pytest.raises(NotImplementedError, match=err_msg): + f(df, True) @pytest.mark.parametrize( "op_str,opname", [("+", "add"), ("*", "mul"), ("-", "sub")] @@ -319,13 +321,15 @@ def test_bool_ops_warn_on_arithmetic(self, op_str, opname): e = fe(False, df.a) tm.assert_series_equal(r, e) - r = f(False, df) - e = fe(False, df) - tm.assert_frame_equal(r, e) + with tm.assert_produces_warning(check_stacklevel=False): + r = f(False, df) + e = fe(False, df) + tm.assert_frame_equal(r, e) - r = f(df, True) - e = fe(df, True) - tm.assert_frame_equal(r, e) + with tm.assert_produces_warning(check_stacklevel=False): + r = f(df, True) + e = fe(df, True) + tm.assert_frame_equal(r, e) @pytest.mark.parametrize( "test_input,expected", From 657d1bb4de8ff271bc12d1a729c8fcfaa891f691 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 17:21:47 -0800 Subject: [PATCH 05/12] ADD asv --- asv_bench/benchmarks/binary_ops.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 58e0db67d6025..510c63971a8ae 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -1,3 +1,4 @@ +import operator import numpy as np from pandas import DataFrame, Series, date_range @@ -9,6 +10,30 @@ import pandas.computation.expressions as expr +class IntFrameWithScalar: + params = [ + [np.float64, np.int64], + [2, 3.0, np.int32(4), np.float64(5)], + [ + operator.add, operator.sub, + operator.mul, operator.truediv, operator.floordiv, + operator.pow, operator.mod, + operator.and_, operator.or_, operator.xor, + operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le + ] + ] + param_names = ["dtype", "scalar", "op"] + + def setup(self, dtype, scalar, op): + arr = np.random.randn(20000, 100) + self.df = DataFrame(arr.astype(dtype)) + + def time_frame_op_with_scalar(self, dtype, scalar, op): + op(self.df, scalar) + + class Ops: params = [[True, False], ["default", 1]] From 66d34c2a13d0fef6e21052a262390cb20db74630 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 17:23:37 -0800 Subject: [PATCH 06/12] remove commented-out --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c7c7e8d94fd3b..ac6dc1c87fbac 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -379,7 +379,7 @@ def apply(self, func, **kwargs): nbs.append(block) return nbs - if not isinstance(result, Block):# and np.ndim(result) != 0: + if not isinstance(result, Block): # Exclude the 0-dim case so we can do reductions result = self.make_block(values=_block_shape(result, ndim=self.ndim)) From 0f2677562f0dbaa92b143c7c4137f8bc7613630c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 17:35:10 -0800 Subject: [PATCH 07/12] Whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a15d5b319fc82..16c72ada2633c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -655,6 +655,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement in :class:`DataFrame` arithmetic and comparison operations with scalars (:issue:`24990`, :issue:`29853`) - Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`) - Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`) - Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`) From a0e4adc8a327b1a21674a3cff8ed86b915693942 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 18:11:47 -0800 Subject: [PATCH 08/12] blackify --- asv_bench/benchmarks/binary_ops.py | 25 +++++++++++++++++-------- pandas/tests/arrays/test_timedeltas.py | 1 - 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 510c63971a8ae..6a9d0d7fcc9c1 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -15,14 +15,23 @@ class IntFrameWithScalar: [np.float64, np.int64], [2, 3.0, np.int32(4), np.float64(5)], [ - operator.add, operator.sub, - operator.mul, operator.truediv, operator.floordiv, - operator.pow, operator.mod, - operator.and_, operator.or_, operator.xor, - operator.eq, operator.ne, - operator.gt, operator.ge, - operator.lt, operator.le - ] + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.pow, + operator.mod, + operator.and_, + operator.or_, + operator.xor, + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ], ] param_names = ["dtype", "scalar", "op"] diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index f151623044050..d40c4838e1ef6 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -11,7 +11,6 @@ def test_only_1dim_accepted(self): # GH#25282 arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") - with pytest.raises(ValueError, match="Only 1-dimensional"): # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 TimedeltaArray(arr.reshape(2, 2, 1)) From 23d5c48b4c566b8ccac7f26ab6fc7bfb7dcef7e9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 18:12:16 -0800 Subject: [PATCH 09/12] isort fixup --- asv_bench/benchmarks/binary_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 6a9d0d7fcc9c1..d7e9f6f974b4c 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -1,4 +1,5 @@ import operator + import numpy as np from pandas import DataFrame, Series, date_range From 2228f5e2ed8217d7a45fa2437366ad5ee8e69b34 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 19:01:58 -0800 Subject: [PATCH 10/12] remoe asv params that fail in ci --- asv_bench/benchmarks/binary_ops.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index d7e9f6f974b4c..64e067d25a454 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -23,9 +23,6 @@ class IntFrameWithScalar: operator.floordiv, operator.pow, operator.mod, - operator.and_, - operator.or_, - operator.xor, operator.eq, operator.ne, operator.gt, From 2f80502fada88e718918f7972f8ba07fcaba2e1d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Dec 2019 09:24:23 -0800 Subject: [PATCH 11/12] comment+docstring --- pandas/core/ops/__init__.py | 1 + pandas/core/ops/array_ops.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 24d1ebe77ade3..9a62b2c24237b 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -373,6 +373,7 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: + # Get the appropriate array-op to apply to each block's values. array_op = get_array_op(func, str_rep=str_rep) bm = left._data.apply(array_op, right=right) return type(left)(bm) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 0bd0c7a850981..e0ddd17335175 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -4,7 +4,7 @@ """ from functools import partial import operator -from typing import Any, Union +from typing import Any, Optional, Union import numpy as np @@ -370,7 +370,21 @@ def fill_bool(x, left=None): return res_values -def get_array_op(op, str_rep=None): +def get_array_op(op, str_rep: Optional[str] = None): + """ + Return a binary array operation corresponding to the given operator op. + + Parameters + ---------- + op : function + Binary operator from operator or roperator module. + str_rep : str or None, default None + str_rep to pass to arithmetic_op + + Returns + ------- + function + """ op_name = op.__name__.strip("_") if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: return partial(comparison_op, op=op) From cf94d1305e9e803dbea4bafe891b918b1b07844b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Dec 2019 08:35:18 -0800 Subject: [PATCH 12/12] remove unreacahble --- pandas/core/ops/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 9a62b2c24237b..be5e53eaa6721 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -378,9 +378,6 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): bm = left._data.apply(array_op, right=right) return type(left)(bm) - def column_op(a, b): - return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} - elif isinstance(right, ABCDataFrame): assert right._indexed_same(left)