diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 205bbcc07fc76..5303d337c8cd7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4331,7 +4331,15 @@ def where(self, cond, other=None): except (ValueError, TypeError): return self.astype(object).where(cond, other) - values = np.where(cond, values, other) + if isinstance(other, np.timedelta64) and self.dtype == object: + # https://github.com/numpy/numpy/issues/12550 + # timedelta64 will incorrectly cast to int + other = [other] * (~cond).sum() + values = cast(np.ndarray, values).copy() + # error: Unsupported target for indexed assignment ("ArrayLike") + values[~cond] = other # type:ignore[index] + else: + values = np.where(cond, values, other) return Index(values, name=self.name) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 77f4263214529..338f949e00142 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -29,7 +29,6 @@ infer_dtype_from, maybe_downcast_numeric, maybe_downcast_to_dtype, - maybe_promote, maybe_upcast, soft_convert_objects, ) @@ -1031,6 +1030,12 @@ def putmask(self, mask, new) -> List[Block]: elif not mask.any(): return [self] + elif isinstance(new, np.timedelta64): + # using putmask with object dtype will incorrect cast to object + # Having excluded self._can_hold_element, we know we cannot operate + # in-place, so we are safe using `where` + return self.where(new, ~mask) + else: # may need to upcast if transpose: @@ -1052,7 +1057,7 @@ def f(mask, val, idx): n = np.array(new) # type of the new block - dtype, _ = maybe_promote(n.dtype) + dtype = find_common_type([n.dtype, val.dtype]) # we need to explicitly astype here to make a copy n = n.astype(dtype) @@ -1311,12 +1316,18 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: blocks = block.where(orig_other, cond, errors=errors, axis=axis) return self._maybe_downcast(blocks, "infer") - # convert datetime to datetime64, timedelta to timedelta64 - other = convert_scalar_for_putitemlike(other, values.dtype) + elif isinstance(other, np.timedelta64): + # expressions.where will cast np.timedelta64 to int + result = self.values.copy() + result[~cond] = [other] * (~cond).sum() + + else: + # convert datetime to datetime64, timedelta to timedelta64 + other = convert_scalar_for_putitemlike(other, values.dtype) - # By the time we get here, we should have all Series/Index - # args extracted to ndarray - result = expressions.where(cond, values, other) + # By the time we get here, we should have all Series/Index + # args extracted to ndarray + result = expressions.where(cond, values, other) if self._can_hold_na or self.ndim == 1: diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 8c40ef6261d19..bbf3715d8e022 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.compat import np_version_under1p20 + from pandas import ( DatetimeIndex, Index, @@ -516,25 +518,127 @@ def test_setitem_slice_into_readonly_backing_data(): assert not array.any() -@pytest.mark.parametrize( - "key", [0, slice(0, 1), [0], np.array([0]), range(1)], ids=type -) -@pytest.mark.parametrize("dtype", [complex, int, float]) -def test_setitem_td64_into_complex(key, dtype, indexer_sli): - # timedelta64 should not be treated as integers - arr = np.arange(5).astype(dtype) - ser = Series(arr) - td = np.timedelta64(4, "ns") - - indexer_sli(ser)[key] = td - assert ser.dtype == object - assert arr[0] == 0 # original array is unchanged - - if not isinstance(key, int) and not ( - indexer_sli is tm.loc and isinstance(key, slice) - ): - # skip key/indexer_sli combinations that will have mismatched lengths +class TestSetitemCastingEquivalentsTimedelta64IntoNumeric: + # timedelta64 should not be treated as integers when setting into + # numeric Series + + @pytest.fixture + def val(self): + td = np.timedelta64(4, "ns") + return td + return np.full((1,), td) + + @pytest.fixture(params=[complex, int, float]) + def dtype(self, request): + return request.param + + @pytest.fixture + def obj(self, dtype): + arr = np.arange(5).astype(dtype) + ser = Series(arr) + return ser + + @pytest.fixture + def expected(self, dtype): + arr = np.arange(5).astype(dtype) ser = Series(arr) - indexer_sli(ser)[key] = np.full((1,), td) - assert ser.dtype == object - assert arr[0] == 0 # original array is unchanged + ser = ser.astype(object) + ser.values[0] = np.timedelta64(4, "ns") + return ser + + @pytest.fixture + def key(self): + return 0 + + def check_indexer(self, obj, key, expected, val, indexer): + orig = obj + obj = obj.copy() + arr = obj._values + + indexer(obj)[key] = val + tm.assert_series_equal(obj, expected) + + tm.assert_equal(arr, orig._values) # original array is unchanged + + def test_int_key(self, obj, key, expected, val, indexer_sli): + if not isinstance(key, int): + return + + self.check_indexer(obj, key, expected, val, indexer_sli) + + rng = range(key, key + 1) + self.check_indexer(obj, rng, expected, val, indexer_sli) + + if indexer_sli is not tm.loc: + # Note: no .loc because that handles slice edges differently + slc = slice(key, key + 1) + self.check_indexer(obj, slc, expected, val, indexer_sli) + + ilkey = [key] + self.check_indexer(obj, ilkey, expected, val, indexer_sli) + + indkey = np.array(ilkey) + self.check_indexer(obj, indkey, expected, val, indexer_sli) + + def test_slice_key(self, obj, key, expected, val, indexer_sli): + if not isinstance(key, slice): + return + + if indexer_sli is not tm.loc: + # Note: no .loc because that handles slice edges differently + self.check_indexer(obj, key, expected, val, indexer_sli) + + ilkey = list(range(len(obj)))[key] + self.check_indexer(obj, ilkey, expected, val, indexer_sli) + + indkey = np.array(ilkey) + self.check_indexer(obj, indkey, expected, val, indexer_sli) + + def test_mask_key(self, obj, key, expected, val, indexer_sli): + # setitem with boolean mask + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + self.check_indexer(obj, mask, expected, val, indexer_sli) + + def test_series_where(self, obj, key, expected, val): + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + orig = obj + obj = obj.copy() + arr = obj._values + res = obj.where(~mask, val) + tm.assert_series_equal(res, expected) + + tm.assert_equal(arr, orig._values) # original array is unchanged + + def test_index_where(self, obj, key, expected, val, request): + if Index(obj).dtype != obj.dtype: + pytest.skip("test not applicable for this dtype") + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + if obj.dtype == bool and not mask.all(): + # When mask is all True, casting behavior does not apply + msg = "Index/Series casting behavior inconsistent GH#38692" + mark = pytest.mark.xfail(reason=msg) + request.node.add_marker(mark) + + res = Index(obj).where(~mask, val) + tm.assert_index_equal(res, Index(expected)) + + @pytest.mark.xfail( + np_version_under1p20, + reason="Index/Series casting behavior inconsistent GH#38692", + ) + def test_index_putmask(self, obj, key, expected, val): + if Index(obj).dtype != obj.dtype: + pytest.skip("test not applicable for this dtype") + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).putmask(mask, val) + tm.assert_index_equal(res, Index(expected))