From a4a7b112d848fb247ab84b43893fe6013e3c821e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Jan 2022 19:16:54 -0800 Subject: [PATCH 1/3] BUG: Series.mask with small int dtypes raising --- pandas/core/array_algos/putmask.py | 55 -------------------- pandas/core/dtypes/cast.py | 1 - pandas/core/internals/blocks.py | 14 ++--- pandas/tests/series/indexing/test_setitem.py | 42 +++++++++++++++ 4 files changed, 47 insertions(+), 65 deletions(-) diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 1082f8d71af01..03fb2d78cb5d4 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -15,9 +15,7 @@ from pandas.compat import np_version_under1p20 from pandas.core.dtypes.cast import ( - can_hold_element, convert_scalar_for_putitemlike, - find_common_type, infer_dtype_from, ) from pandas.core.dtypes.common import is_list_like @@ -61,59 +59,6 @@ def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) np.putmask(values, mask, value) -def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.ndarray: - """ - Return a new ndarray, try to preserve dtype if possible. - - Parameters - ---------- - values : np.ndarray - `values`, updated in-place. - mask : np.ndarray[bool] - Applies to both sides (array like). - new : listlike `new values` aligned with `values` - - Returns - ------- - values : ndarray with updated values - this *may* be a copy of the original - - See Also - -------- - np.putmask - """ - # we cannot use np.asarray() here as we cannot have conversions - # that numpy does when numeric are mixed with strings - - # see if we are only masking values that if putted - # will work in the current dtype - try: - nn = new[mask] - except TypeError: - # TypeError: only integer scalar arrays can be converted to a scalar index - pass - else: - # We only get to putmask_smart when we cannot hold 'new' in values. - # The "smart" part of putmask_smart is checking if we can hold new[mask] - # in values, in which case we can still avoid the need to cast. - if can_hold_element(values, nn): - values[mask] = nn - return values - - new = np.asarray(new) - - if values.dtype.kind == new.dtype.kind: - # preserves dtype if possible - np.putmask(values, mask, new) - return values - - dtype = find_common_type([values.dtype, new.dtype]) - values = values.astype(dtype) - - np.putmask(values, mask, new) - return values - - def putmask_without_repeat( values: np.ndarray, mask: npt.NDArray[np.bool_], new: Any ) -> None: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1645ee13724b3..b1d7de0515998 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1980,7 +1980,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if tipo.kind not in ["i", "u"]: if isinstance(element, np.ndarray) and element.dtype.kind == "f": # If all can be losslessly cast to integers, then we can hold them - # We do something similar in putmask_smart casted = element.astype(dtype) comp = casted == element if comp.all(): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4f530ffc5ed5d..217a0a00b3ce0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -75,7 +75,6 @@ from pandas.core.array_algos.putmask import ( extract_bool_array, putmask_inplace, - putmask_smart, putmask_without_repeat, setitem_datetimelike_compat, validate_putmask, @@ -978,15 +977,12 @@ def putmask(self, mask, new) -> list[Block]: # no need to split columns if not is_list_like(new): - # putmask_smart can't save us the need to cast + # using just new[indexer] can't save us the need to cast return self.coerce_to_target_dtype(new).putmask(mask, new) - - # This differs from - # `self.coerce_to_target_dtype(new).putmask(mask, new)` - # because putmask_smart will check if new[mask] may be held - # by our dtype. - nv = putmask_smart(values.T, mask, new).T - return [self.make_block(nv)] + else: + indexer = mask.nonzero()[0] + nb = self.setitem(indexer, new[indexer]) + return [nb] else: is_array = isinstance(new, np.ndarray) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 667dae55ef9df..24a77ba3e0244 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -364,6 +364,48 @@ def test_setitem_nan_with_bool(self): expected = Series([np.nan, False, True], dtype=object) tm.assert_series_equal(result, expected) + def test_setitem_mask_smallint_upcast(self): + orig = Series([1, 2, 3], dtype="int8") + alt = np.array([999, 1000, 1001]) + + mask = np.array([True, False, True]) + + ser = orig.copy() + ser[mask] = Series(alt) + expected = Series([999, 2, 1001]) + tm.assert_series_equal(ser, expected) + + ser2 = orig.copy() + ser2.mask(mask, alt, inplace=True) + tm.assert_series_equal(ser2, expected) + + ser3 = orig.copy() + res = ser3.where(~mask, Series(alt)) + tm.assert_series_equal(res, expected) + + def test_setitem_mask_smallint_no_upcast(self): + # like test_setitem_mask_smallint_upcast, but while we can't hold 'alt', + # we *can* hold alt[mask] without casting + orig = Series([1, 2, 3], dtype="uint8") + alt = Series([245, 1000, 246]) + + mask = np.array([True, False, True]) + + ser = orig.copy() + ser[mask] = alt + expected = Series([245, 2, 246], dtype="uint8") + tm.assert_series_equal(ser, expected) + + ser2 = orig.copy() + ser2.mask(mask, alt, inplace=True) + tm.assert_series_equal(ser2, expected) + + # FIXME: don't leave commented-out + # FIXME: ser.where(~mask, alt) unnecessarily upcasts to int64 + # ser3 = orig.copy() + # res = ser3.where(~mask, alt) + # tm.assert_series_equal(res, expected) + class TestSetitemViewCopySemantics: def test_setitem_invalidates_datetime_index_freq(self): From 4d7ff62ffe0e792dcc0cbd7bc3a7198f8b104803 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Jan 2022 20:37:57 -0800 Subject: [PATCH 2/3] 32bit compat --- pandas/tests/series/indexing/test_setitem.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 24a77ba3e0244..e1d13410ca71f 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -366,7 +366,7 @@ def test_setitem_nan_with_bool(self): def test_setitem_mask_smallint_upcast(self): orig = Series([1, 2, 3], dtype="int8") - alt = np.array([999, 1000, 1001]) + alt = np.array([999, 1000, 1001], dtype=np.int64) mask = np.array([True, False, True]) @@ -387,7 +387,7 @@ def test_setitem_mask_smallint_no_upcast(self): # like test_setitem_mask_smallint_upcast, but while we can't hold 'alt', # we *can* hold alt[mask] without casting orig = Series([1, 2, 3], dtype="uint8") - alt = Series([245, 1000, 246]) + alt = Series([245, 1000, 246], dtype=np.int64) mask = np.array([True, False, True]) From 0ea6bacf78edc5dcf9bb9c898ef4d3cc65818a1b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Feb 2022 09:31:21 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 063e0c7512f4d..3829306f77a0b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -271,6 +271,7 @@ Indexing - Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) - Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) - Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) +- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`) - Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) - Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)