Skip to content

BUG: setting td64 value into numeric Series incorrectly casting to int #39488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 3, 2021
10 changes: 9 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4331,7 +4331,15 @@ def where(self, cond, other=None):
except (ValueError, TypeError):
return self.astype(object).where(cond, other)

values = np.where(cond, values, other)
if isinstance(other, np.timedelta64) and self.dtype == object:
# https://github.com/numpy/numpy/issues/12550
# timedelta64 will incorrectly cast to int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

too bad you cannot use np.where then cast back the integers if necessary

other = [other] * (~cond).sum()
values = cast(np.ndarray, values).copy()
# error: Unsupported target for indexed assignment ("ArrayLike")
values[~cond] = other # type:ignore[index]
else:
values = np.where(cond, values, other)

return Index(values, name=self.name)

Expand Down
25 changes: 18 additions & 7 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
infer_dtype_from,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_promote,
maybe_upcast,
soft_convert_objects,
)
Expand Down Expand Up @@ -1031,6 +1030,12 @@ def putmask(self, mask, new) -> List[Block]:
elif not mask.any():
return [self]

elif isinstance(new, np.timedelta64):
# using putmask with object dtype will incorrect cast to object
# Having excluded self._can_hold_element, we know we cannot operate
# in-place, so we are safe using `where`
return self.where(new, ~mask)

else:
# may need to upcast
if transpose:
Expand All @@ -1052,7 +1057,7 @@ def f(mask, val, idx):
n = np.array(new)

# type of the new block
dtype, _ = maybe_promote(n.dtype)
dtype = find_common_type([n.dtype, val.dtype])

# we need to explicitly astype here to make a copy
n = n.astype(dtype)
Expand Down Expand Up @@ -1311,12 +1316,18 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
return self._maybe_downcast(blocks, "infer")

# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)
elif isinstance(other, np.timedelta64):
# expressions.where will cast np.timedelta64 to int
result = self.values.copy()
result[~cond] = [other] * (~cond).sum()

else:
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)

# By the time we get here, we should have all Series/Index
# args extracted to ndarray
result = expressions.where(cond, values, other)
# By the time we get here, we should have all Series/Index
# args extracted to ndarray
result = expressions.where(cond, values, other)

if self._can_hold_na or self.ndim == 1:

Expand Down
146 changes: 125 additions & 21 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas.compat import np_version_under1p20

from pandas import (
DatetimeIndex,
Index,
Expand Down Expand Up @@ -516,25 +518,127 @@ def test_setitem_slice_into_readonly_backing_data():
assert not array.any()


@pytest.mark.parametrize(
"key", [0, slice(0, 1), [0], np.array([0]), range(1)], ids=type
)
@pytest.mark.parametrize("dtype", [complex, int, float])
def test_setitem_td64_into_complex(key, dtype, indexer_sli):
# timedelta64 should not be treated as integers
arr = np.arange(5).astype(dtype)
ser = Series(arr)
td = np.timedelta64(4, "ns")

indexer_sli(ser)[key] = td
assert ser.dtype == object
assert arr[0] == 0 # original array is unchanged

if not isinstance(key, int) and not (
indexer_sli is tm.loc and isinstance(key, slice)
):
# skip key/indexer_sli combinations that will have mismatched lengths
class TestSetitemCastingEquivalentsTimedelta64IntoNumeric:
# timedelta64 should not be treated as integers when setting into
# numeric Series

@pytest.fixture
def val(self):
td = np.timedelta64(4, "ns")
return td
return np.full((1,), td)

@pytest.fixture(params=[complex, int, float])
def dtype(self, request):
return request.param

@pytest.fixture
def obj(self, dtype):
arr = np.arange(5).astype(dtype)
ser = Series(arr)
return ser

@pytest.fixture
def expected(self, dtype):
arr = np.arange(5).astype(dtype)
ser = Series(arr)
indexer_sli(ser)[key] = np.full((1,), td)
assert ser.dtype == object
assert arr[0] == 0 # original array is unchanged
ser = ser.astype(object)
ser.values[0] = np.timedelta64(4, "ns")
return ser

@pytest.fixture
def key(self):
return 0

def check_indexer(self, obj, key, expected, val, indexer):
orig = obj
obj = obj.copy()
arr = obj._values

indexer(obj)[key] = val
tm.assert_series_equal(obj, expected)

tm.assert_equal(arr, orig._values) # original array is unchanged

def test_int_key(self, obj, key, expected, val, indexer_sli):
if not isinstance(key, int):
return

self.check_indexer(obj, key, expected, val, indexer_sli)

rng = range(key, key + 1)
self.check_indexer(obj, rng, expected, val, indexer_sli)

if indexer_sli is not tm.loc:
# Note: no .loc because that handles slice edges differently
slc = slice(key, key + 1)
self.check_indexer(obj, slc, expected, val, indexer_sli)

ilkey = [key]
self.check_indexer(obj, ilkey, expected, val, indexer_sli)

indkey = np.array(ilkey)
self.check_indexer(obj, indkey, expected, val, indexer_sli)

def test_slice_key(self, obj, key, expected, val, indexer_sli):
if not isinstance(key, slice):
return

if indexer_sli is not tm.loc:
# Note: no .loc because that handles slice edges differently
self.check_indexer(obj, key, expected, val, indexer_sli)

ilkey = list(range(len(obj)))[key]
self.check_indexer(obj, ilkey, expected, val, indexer_sli)

indkey = np.array(ilkey)
self.check_indexer(obj, indkey, expected, val, indexer_sli)

def test_mask_key(self, obj, key, expected, val, indexer_sli):
# setitem with boolean mask
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

self.check_indexer(obj, mask, expected, val, indexer_sli)

def test_series_where(self, obj, key, expected, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

orig = obj
obj = obj.copy()
arr = obj._values
res = obj.where(~mask, val)
tm.assert_series_equal(res, expected)

tm.assert_equal(arr, orig._values) # original array is unchanged

def test_index_where(self, obj, key, expected, val, request):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if obj.dtype == bool and not mask.all():
# When mask is all True, casting behavior does not apply
msg = "Index/Series casting behavior inconsistent GH#38692"
mark = pytest.mark.xfail(reason=msg)
request.node.add_marker(mark)

res = Index(obj).where(~mask, val)
tm.assert_index_equal(res, Index(expected))

@pytest.mark.xfail(
np_version_under1p20,
reason="Index/Series casting behavior inconsistent GH#38692",
)
def test_index_putmask(self, obj, key, expected, val):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

res = Index(obj).putmask(mask, val)
tm.assert_index_equal(res, Index(expected))