From b49bda489545a23b95437ca83e466db68bc92061 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 6 Jul 2019 15:14:02 -0700 Subject: [PATCH 1/9] move away from runtime imports --- pandas/core/computation/expressions.py | 7 +++---- pandas/core/ops/__init__.py | 18 +++++++----------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index dc4e6e85f6e7d..ea61467080291 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -12,9 +12,10 @@ from pandas._config import get_option +from pandas._libs.lib import values_from_object + from pandas.core.dtypes.generic import ABCDataFrame -import pandas.core.common as com from pandas.core.computation.check import _NUMEXPR_INSTALLED if _NUMEXPR_INSTALLED: @@ -129,9 +130,7 @@ def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwa def _where_standard(cond, a, b): return np.where( - com.values_from_object(cond), - com.values_from_object(a), - com.values_from_object(b), + values_from_object(cond), values_from_object(a), values_from_object(b) ) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index df2907bf591dd..4f88b12fcd647 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -11,7 +11,7 @@ import numpy as np -from pandas._libs import lib, ops as libops +from pandas._libs import Timedelta, Timestamp, lib, ops as libops from pandas.errors import NullFrequencyError from pandas.util._decorators import Appender @@ -49,6 +49,7 @@ import pandas as pd from pandas._typing import ArrayLike import pandas.core.common as com +from pandas.core.computation import expressions from . import missing from .roperator import ( # noqa:F401 @@ -86,7 +87,7 @@ def get_op_result_name(left, right): Usually a string """ # `left` is always a pd.Series when called from within ops - if isinstance(right, (ABCSeries, pd.Index)): + if isinstance(right, (ABCSeries, ABCIndexClass)): name = _maybe_match_name(left, right) else: name = left.name @@ -150,14 +151,14 @@ def maybe_upcast_for_op(obj): # GH#22390 cast up to Timedelta to rely on Timedelta # implementation; otherwise operation against numeric-dtype # raises TypeError - return pd.Timedelta(obj) + return Timedelta(obj) elif isinstance(obj, np.timedelta64) and not isna(obj): # In particular non-nanosecond timedelta64 needs to be cast to # nanoseconds, or else we get undesired behavior like # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') # The isna check is to avoid casting timedelta64("NaT"), which would # return NaT and incorrectly be treated as a datetime-NaT. - return pd.Timedelta(obj) + return Timedelta(obj) elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj): # GH#22390 Unfortunately we need to special-case right-hand # timedelta64 dtypes because numpy casts integer dtypes to @@ -1253,7 +1254,6 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. - import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: @@ -1661,8 +1661,6 @@ def na_op(x, y): ------ TypeError : invalid operation """ - import pandas.core.computation.expressions as expressions - try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: @@ -1849,7 +1847,7 @@ def wrapper(self, other, axis=None): ) msg = "\n".join(textwrap.wrap(msg.format(future=future))) warnings.warn(msg, FutureWarning, stacklevel=2) - other = pd.Timestamp(other) + other = Timestamp(other) res_values = dispatch_to_index_op(op, self, other, pd.DatetimeIndex) @@ -1875,7 +1873,7 @@ def wrapper(self, other, axis=None): res_values, index=self.index, name=res_name ).rename(res_name) - elif isinstance(other, (np.ndarray, pd.Index)): + elif isinstance(other, (np.ndarray, ABCIndexClass)): # do not check length of zerodim array # as it will broadcast if other.ndim != 0 and len(self) != len(other): @@ -2143,8 +2141,6 @@ def _arith_method_FRAME(cls, op, special): default_axis = _get_frame_op_default_axis(op_name) def na_op(x, y): - import pandas.core.computation.expressions as expressions - try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: From a0fc709ac2e2bcb6d3e726f74cacaf59def41727 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 7 Jul 2019 11:35:44 -0700 Subject: [PATCH 2/9] remove putmask transpose arg, still should double-check new assertions --- pandas/core/generic.py | 1 - pandas/core/internals/blocks.py | 29 ++++++++++++++++------------- pandas/core/internals/managers.py | 2 ++ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b79bde9cc3cb1..3c2f6fa294e82 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9163,7 +9163,6 @@ def _where( align=align, inplace=True, axis=block_axis, - transpose=self._AXIS_REVERSED, ) self._update_inplace(new_data) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bf6ebf1abe760..9e73ad5c282d1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -397,10 +397,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): raise ValueError("Limit must be an integer") if limit < 1: raise ValueError("Limit must be greater than 0") - if self.ndim > 2: - raise NotImplementedError( - "number of dimensions for 'fillna' is currently limited to 2" - ) mask[mask.cumsum(self.ndim - 1) > limit] = False if not self._can_hold_na: @@ -430,7 +426,9 @@ def f(m, v, i): return self.split_and_operate(mask, f, inplace) else: - blocks = self.putmask(mask, value, inplace=inplace) + # transpose of value is irrelevant + assert np.ndim(value) <= 1, value + blocks = self.putmask(mask.T, value, inplace=inplace) blocks = [ b.make_block(values=self._try_coerce_result(b.values)) for b in blocks ] @@ -806,8 +804,9 @@ def replace( filtered_out = ~self.mgr_locs.isin(filter) mask[filtered_out.nonzero()[0]] = False + assert np.ndim(value) == 0, value try: - blocks = self.putmask(mask, value, inplace=inplace) + blocks = self.putmask(mask.T, value, inplace=inplace) except (TypeError, ValueError): # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. @@ -853,6 +852,8 @@ def setitem(self, indexer, value): `indexer` is a direct slice/positional indexer. `value` must be a compatible shape. """ + transpose = self.ndim == 2 + # coerce None values, if appropriate if value is None: if self.is_numeric: @@ -901,8 +902,8 @@ def setitem(self, indexer, value): dtype, _ = maybe_promote(arr_value.dtype) values = values.astype(dtype) - transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) - values = transf(values) + if transpose: + values = values.T # length checking check_setitem_lengths(indexer, value, values) @@ -961,10 +962,12 @@ def _is_empty_indexer(indexer): # coerce and try to infer the dtypes of the result values = self._try_coerce_and_cast_result(values, dtype) - block = self.make_block(transf(values)) + if transpose: + values = values.T + block = self.make_block(values) return block - def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): + def putmask(self, mask, new, align=True, inplace=False, axis=0): """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -977,13 +980,12 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) align : boolean, perform alignment on other/cond, default is True inplace : perform inplace modification, default is False axis : int - transpose : boolean - Set to True if self is stored with axes reversed Returns ------- a list of new blocks, the result of the putmask """ + transpose = self.ndim == 2 new_values = self.values if inplace else self.values.copy() @@ -1630,7 +1632,8 @@ def _replace_coerce( if mask.any(): if not regex: self = self.coerce_to_target_dtype(value) - return self.putmask(mask, value, inplace=inplace) + assert np.ndim(value) == 0 or is_object_dtype(self), (value, self.dtype) + return self.putmask(mask.T, value, inplace=inplace) else: return self._replace_single( to_replace, diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b3c74aaaa5701..d92a605e5d58f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -584,6 +584,7 @@ def convert(self, **kwargs): return self.apply("convert", **kwargs) def replace(self, **kwargs): + assert 'value' in kwargs and np.ndim(kwargs['value']) == 0 return self.apply("replace", **kwargs) def replace_list(self, src_list, dest_list, inplace=False, regex=False): @@ -617,6 +618,7 @@ def comp(s, regex=False): # replace ALWAYS will return a list rb = [blk if inplace else blk.copy()] for i, (s, d) in enumerate(zip(src_list, dest_list)): + # TODO: assert/validate that `d` is always a scalar? new_rb = [] for b in rb: m = masks[i][b.mgr_locs.indexer] From 135f4b4ff92ecbed49b7296f5fb15429b575ed6d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 7 Jul 2019 15:04:41 -0700 Subject: [PATCH 3/9] port some test updates --- pandas/tests/computation/test_eval.py | 2 +- pandas/tests/io/json/test_pandas.py | 1 + pandas/tests/series/test_arithmetic.py | 11 ++--------- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 2fd7c8f04c8be..bc4622e9a7c55 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -411,7 +411,7 @@ def check_single_invert_op(self, lhs, cmp1, rhs): ) def check_compound_invert_op(self, lhs, cmp1, rhs): - skip_these = "in", "not in" + skip_these = ["in", "not in"] ex = "~(lhs {0} rhs)".format(cmp1) msg = ( diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a0686b53b83a4..6d532b5c69616 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1432,6 +1432,7 @@ def test_to_jsonl(self): assert result == expected assert_frame_equal(pd.read_json(result, lines=True), df) + # TODO: there is a near-identical test for pytables; can we share? def test_latin_encoding(self): # GH 13774 pytest.skip("encoding not implemented in .to_json(), " "xref #13774") diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 5b57b5ba2dbae..89557445cafb4 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -24,7 +24,7 @@ class TestSeriesFlexArithmetic: ], ) @pytest.mark.parametrize( - "opname", ["add", "sub", "mul", "floordiv", "truediv", "div", "pow"] + "opname", ["add", "sub", "mul", "floordiv", "truediv", "pow"] ) def test_flex_method_equivalence(self, opname, ts): # check that Series.{opname} behaves like Series.__{opname}__, @@ -34,15 +34,8 @@ def test_flex_method_equivalence(self, opname, ts): other = ts[1](tser) check_reverse = ts[2] - if opname == "div": - pytest.skip("div test only for Py3") - op = getattr(Series, opname) - - if op == "div": - alt = operator.truediv - else: - alt = getattr(operator, opname) + alt = getattr(operator, opname) result = op(series, other) expected = alt(series, other) From af3409538cd97d58c54cbf996faacee2aab0c613 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 7 Jul 2019 15:06:43 -0700 Subject: [PATCH 4/9] use transpose pattern --- pandas/core/generic.py | 1 + pandas/core/internals/blocks.py | 19 ++++++++----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3c2f6fa294e82..b79bde9cc3cb1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9163,6 +9163,7 @@ def _where( align=align, inplace=True, axis=block_axis, + transpose=self._AXIS_REVERSED, ) self._update_inplace(new_data) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9e73ad5c282d1..1d436bb0010bc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -426,9 +426,7 @@ def f(m, v, i): return self.split_and_operate(mask, f, inplace) else: - # transpose of value is irrelevant - assert np.ndim(value) <= 1, value - blocks = self.putmask(mask.T, value, inplace=inplace) + blocks = self.putmask(mask, value, inplace=inplace) blocks = [ b.make_block(values=self._try_coerce_result(b.values)) for b in blocks ] @@ -804,9 +802,8 @@ def replace( filtered_out = ~self.mgr_locs.isin(filter) mask[filtered_out.nonzero()[0]] = False - assert np.ndim(value) == 0, value try: - blocks = self.putmask(mask.T, value, inplace=inplace) + blocks = self.putmask(mask, value, inplace=inplace) except (TypeError, ValueError): # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. @@ -903,7 +900,7 @@ def setitem(self, indexer, value): values = values.astype(dtype) if transpose: - values = values.T + value = values.T # length checking check_setitem_lengths(indexer, value, values) @@ -963,11 +960,11 @@ def _is_empty_indexer(indexer): # coerce and try to infer the dtypes of the result values = self._try_coerce_and_cast_result(values, dtype) if transpose: - values = values.T + value = values.T block = self.make_block(values) return block - def putmask(self, mask, new, align=True, inplace=False, axis=0): + def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -980,12 +977,13 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0): align : boolean, perform alignment on other/cond, default is True inplace : perform inplace modification, default is False axis : int + transpose : boolean + Set to True if self is stored with axes reversed Returns ------- a list of new blocks, the result of the putmask """ - transpose = self.ndim == 2 new_values = self.values if inplace else self.values.copy() @@ -1632,8 +1630,7 @@ def _replace_coerce( if mask.any(): if not regex: self = self.coerce_to_target_dtype(value) - assert np.ndim(value) == 0 or is_object_dtype(self), (value, self.dtype) - return self.putmask(mask.T, value, inplace=inplace) + return self.putmask(mask, value, inplace=inplace) else: return self._replace_single( to_replace, From 96345ffe1866e9c8074da50a20edaadce2f752c2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 17:48:27 -0700 Subject: [PATCH 5/9] typo fixup --- pandas/core/internals/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1d436bb0010bc..5785dbfbd6cac 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -900,7 +900,7 @@ def setitem(self, indexer, value): values = values.astype(dtype) if transpose: - value = values.T + values = values.T # length checking check_setitem_lengths(indexer, value, values) @@ -960,7 +960,7 @@ def _is_empty_indexer(indexer): # coerce and try to infer the dtypes of the result values = self._try_coerce_and_cast_result(values, dtype) if transpose: - value = values.T + values = values.T block = self.make_block(values) return block From 1d7d3e99aaabc2ec7a5a900881997a9b69dc7352 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 17:54:34 -0700 Subject: [PATCH 6/9] blackify --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d92a605e5d58f..03b505e82565b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -584,7 +584,7 @@ def convert(self, **kwargs): return self.apply("convert", **kwargs) def replace(self, **kwargs): - assert 'value' in kwargs and np.ndim(kwargs['value']) == 0 + assert "value" in kwargs and np.ndim(kwargs["value"]) == 0 return self.apply("replace", **kwargs) def replace_list(self, src_list, dest_list, inplace=False, regex=False): From c3dbdb824aab2e3d80b3d791bdd7797fd222cc70 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 18:26:47 -0700 Subject: [PATCH 7/9] Revert eager expressions import --- pandas/core/ops/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index b2a35fca98283..d735ab3ad2535 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -50,7 +50,6 @@ import pandas as pd from pandas._typing import ArrayLike import pandas.core.common as com -from pandas.core.computation import expressions from . import missing from .roperator import ( # noqa:F401 @@ -1255,6 +1254,7 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. + import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: @@ -1656,6 +1656,8 @@ def na_op(x, y): ------ TypeError : invalid operation """ + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: @@ -2156,6 +2158,8 @@ def _arith_method_FRAME(cls, op, special): default_axis = _get_frame_op_default_axis(op_name) def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: From a3b5fb3fd909b884de4440056872c96112629f1c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 20:11:54 -0700 Subject: [PATCH 8/9] dummy to force CI From 275e2f81bd1367ebeb10685b88b015d1d6750f68 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 9 Jul 2019 07:51:47 -0700 Subject: [PATCH 9/9] add value to signature --- pandas/core/internals/managers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 03b505e82565b..cd678a235cfc1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -583,9 +583,9 @@ def astype(self, dtype, **kwargs): def convert(self, **kwargs): return self.apply("convert", **kwargs) - def replace(self, **kwargs): - assert "value" in kwargs and np.ndim(kwargs["value"]) == 0 - return self.apply("replace", **kwargs) + def replace(self, value, **kwargs): + assert np.ndim(value) == 0, value + return self.apply("replace", value=value, **kwargs) def replace_list(self, src_list, dest_list, inplace=False, regex=False): """ do a list replace """