From 588a3ac4516b5b8315b2fa0f903796cd05ebeffd Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 Jan 2021 11:09:06 -0800 Subject: [PATCH 1/4] BUG: setting td64 value into Series[numeric] incorretly casts to int --- pandas/core/indexes/base.py | 9 +- pandas/core/internals/blocks.py | 25 ++-- pandas/tests/series/indexing/test_setitem.py | 123 +++++++++++++++++++ 3 files changed, 149 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 205bbcc07fc76..bd019636feffa 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4331,7 +4331,14 @@ def where(self, cond, other=None): except (ValueError, TypeError): return self.astype(object).where(cond, other) - values = np.where(cond, values, other) + if isinstance(other, np.timedelta64) and self.dtype == object: + # https://github.com/numpy/numpy/issues/12550 + # timedelta64 will incorrectly cast to int + other = [other] * (~cond).sum() + values = values.copy() + values[~cond] = other + else: + values = np.where(cond, values, other) return Index(values, name=self.name) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index eb8bb0fe90e9a..7aaca36fbbf70 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -29,7 +29,6 @@ infer_dtype_from, maybe_downcast_numeric, maybe_downcast_to_dtype, - maybe_promote, maybe_upcast, soft_convert_objects, ) @@ -1031,6 +1030,12 @@ def putmask(self, mask, new) -> List[Block]: elif not mask.any(): return [self] + elif isinstance(new, np.timedelta64): + # using putmask with object dtype will incorrect cast to object + # Having excluded self._can_hold_element, we know we cannot operate + # in-place, so we are safe using `where` + return self.where(new, ~mask) + else: # may need to upcast if transpose: @@ -1052,7 +1057,7 @@ def f(mask, val, idx): n = np.array(new) # type of the new block - dtype, _ = maybe_promote(n.dtype) + dtype = find_common_type([n.dtype, val.dtype]) # we need to explicitly astype here to make a copy n = n.astype(dtype) @@ -1309,12 +1314,18 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: blocks = block.where(orig_other, cond, errors=errors, axis=axis) return self._maybe_downcast(blocks, "infer") - # convert datetime to datetime64, timedelta to timedelta64 - other = convert_scalar_for_putitemlike(other, values.dtype) + elif isinstance(other, np.timedelta64): + # expressions.where will cast np.timedelta64 to int + result = self.values.copy() + result[~cond] = [other] * (~cond).sum() + + else: + # convert datetime to datetime64, timedelta to timedelta64 + other = convert_scalar_for_putitemlike(other, values.dtype) - # By the time we get here, we should have all Series/Index - # args extracted to ndarray - result = expressions.where(cond, values, other) + # By the time we get here, we should have all Series/Index + # args extracted to ndarray + result = expressions.where(cond, values, other) if self._can_hold_na or self.ndim == 1: diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 36cd6b0327ccd..7627980fb5554 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -494,3 +494,126 @@ def test_setitem_td64_into_complex(key, dtype, indexer_sli): indexer_sli(ser)[key] = np.full((1,), td) assert ser.dtype == object assert arr[0] == 0 # original array is unchanged + + +class TestSetitemCastingEquivalentsTimedelta64IntoNumeric: + # timedelta64 should not be treated as integers when setting into + # numeric Series + + @pytest.fixture + def val(self): + td = np.timedelta64(4, "ns") + return td + return np.full((1,), td) + + @pytest.fixture(params=[complex, int, float]) + def dtype(self, request): + return request.param + + @pytest.fixture + def obj(self, dtype): + arr = np.arange(5).astype(dtype) + ser = Series(arr) + return ser + + @pytest.fixture + def expected(self, dtype): + arr = np.arange(5).astype(dtype) + ser = Series(arr) + ser = ser.astype(object) + ser.values[0] = np.timedelta64(4, "ns") + return ser + + @pytest.fixture + def key(self): + return 0 + + def check_indexer(self, obj, key, expected, val, indexer): + orig = obj + obj = obj.copy() + arr = obj._values + + indexer(obj)[key] = val + tm.assert_series_equal(obj, expected) + + tm.assert_equal(arr, orig._values) # original array is unchanged + + def test_int_key(self, obj, key, expected, val, indexer_sli): + if not isinstance(key, int): + return + + self.check_indexer(obj, key, expected, val, indexer_sli) + + rng = range(key, key + 1) + self.check_indexer(obj, rng, expected, val, indexer_sli) + + if indexer_sli is not tm.loc: + # Note: no .loc because that handles slice edges differently + slc = slice(key, key + 1) + self.check_indexer(obj, slc, expected, val, indexer_sli) + + ilkey = [key] + self.check_indexer(obj, ilkey, expected, val, indexer_sli) + + indkey = np.array(ilkey) + self.check_indexer(obj, indkey, expected, val, indexer_sli) + + def test_slice_key(self, obj, key, expected, val, indexer_sli): + if not isinstance(key, slice): + return + + if indexer_sli is not tm.loc: + # Note: no .loc because that handles slice edges differently + self.check_indexer(obj, key, expected, val, indexer_sli) + + ilkey = list(range(len(obj)))[key] + self.check_indexer(obj, ilkey, expected, val, indexer_sli) + + indkey = np.array(ilkey) + self.check_indexer(obj, indkey, expected, val, indexer_sli) + + def test_mask_key(self, obj, key, expected, val, indexer_sli): + # setitem with boolean mask + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + self.check_indexer(obj, mask, expected, val, indexer_sli) + + def test_series_where(self, obj, key, expected, val): + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + orig = obj + obj = obj.copy() + arr = obj._values + res = obj.where(~mask, val) + tm.assert_series_equal(res, expected) + + tm.assert_equal(arr, orig._values) # original array is unchanged + + def test_index_where(self, obj, key, expected, val, request): + if Index(obj).dtype != obj.dtype: + pytest.skip("test not applicable for this dtype") + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + if obj.dtype == bool and not mask.all(): + # When mask is all True, casting behavior does not apply + msg = "Index/Series casting behavior inconsistent GH#38692" + mark = pytest.mark.xfail(reason=msg) + request.node.add_marker(mark) + + res = Index(obj).where(~mask, val) + tm.assert_index_equal(res, Index(expected)) + + @pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692") + def test_index_putmask(self, obj, key, expected, val): + if Index(obj).dtype != obj.dtype: + pytest.skip("test not applicable for this dtype") + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).putmask(mask, val) + tm.assert_index_equal(res, Index(expected)) From 39f9d09fd227b1fcaea22681fb668237c06316b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 Jan 2021 11:11:27 -0800 Subject: [PATCH 2/4] remove now-duplicate test --- pandas/tests/series/indexing/test_setitem.py | 24 -------------------- 1 file changed, 24 deletions(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7627980fb5554..159860ad5c8f0 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -472,30 +472,6 @@ def test_setitem_slice_into_readonly_backing_data(): assert not array.any() -@pytest.mark.parametrize( - "key", [0, slice(0, 1), [0], np.array([0]), range(1)], ids=type -) -@pytest.mark.parametrize("dtype", [complex, int, float]) -def test_setitem_td64_into_complex(key, dtype, indexer_sli): - # timedelta64 should not be treated as integers - arr = np.arange(5).astype(dtype) - ser = Series(arr) - td = np.timedelta64(4, "ns") - - indexer_sli(ser)[key] = td - assert ser.dtype == object - assert arr[0] == 0 # original array is unchanged - - if not isinstance(key, int) and not ( - indexer_sli is tm.loc and isinstance(key, slice) - ): - # skip key/indexer_sli combinations that will have mismatched lengths - ser = Series(arr) - indexer_sli(ser)[key] = np.full((1,), td) - assert ser.dtype == object - assert arr[0] == 0 # original array is unchanged - - class TestSetitemCastingEquivalentsTimedelta64IntoNumeric: # timedelta64 should not be treated as integers when setting into # numeric Series From 970353c212c22ae911428852662c7951d525336e Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 Jan 2021 12:37:36 -0800 Subject: [PATCH 3/4] mypy fixup --- pandas/core/indexes/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bd019636feffa..5303d337c8cd7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4335,8 +4335,9 @@ def where(self, cond, other=None): # https://github.com/numpy/numpy/issues/12550 # timedelta64 will incorrectly cast to int other = [other] * (~cond).sum() - values = values.copy() - values[~cond] = other + values = cast(np.ndarray, values).copy() + # error: Unsupported target for indexed assignment ("ArrayLike") + values[~cond] = other # type:ignore[index] else: values = np.where(cond, values, other) From 6cf396ac2feaf73ae2f5594ae4474ca0191e23e3 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 Jan 2021 14:11:57 -0800 Subject: [PATCH 4/4] np 1.20 compat --- pandas/tests/indexing/test_loc.py | 4 ++-- pandas/tests/series/indexing/test_setitem.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b6e4c38ef7f0e..87b04438c3d34 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import is_numpy_dev +from pandas.compat import np_version_under1p20 import pandas.util._test_decorators as td import pandas as pd @@ -978,7 +978,7 @@ def test_loc_setitem_empty_append_single_value(self): df.loc[0, "x"] = expected.loc[0, "x"] tm.assert_frame_equal(df, expected) - @pytest.mark.xfail(is_numpy_dev, reason="gh-35481") + @pytest.mark.xfail(not np_version_under1p20, reason="gh-35481") def test_loc_setitem_empty_append_raises(self): # GH6173, various appends to an empty dataframe diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 041c1af874398..3fe4579551282 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.compat import np_version_under1p20 + from pandas import ( DatetimeIndex, Index, @@ -611,7 +613,10 @@ def test_index_where(self, obj, key, expected, val, request): res = Index(obj).where(~mask, val) tm.assert_index_equal(res, Index(expected)) - @pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692") + @pytest.mark.xfail( + np_version_under1p20, + reason="Index/Series casting behavior inconsistent GH#38692", + ) def test_index_putmask(self, obj, key, expected, val): if Index(obj).dtype != obj.dtype: pytest.skip("test not applicable for this dtype")