Skip to content

BUG: Fix inserting of wrong-dtyped NaT, closes #27297 #27311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jul 22, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
ee426ab
BUG: fix+test setitem datetime64(NaT) into Series[timedelta64]
jbrockmendel Jul 9, 2019
511650c
BUG: fix+test setting datetime64(NaT) in TimedeltaArray
jbrockmendel Jul 9, 2019
3c916c1
TST: inserting timedelta64(NaT) into DatetimeArray, fixed in previous…
jbrockmendel Jul 9, 2019
d39503d
BUG: fix+test assigning timedelta64(NaT) to datetime series
jbrockmendel Jul 9, 2019
b047cce
BUG: fix+test assignment of wrong nat to DataFrame
jbrockmendel Jul 9, 2019
7399ef0
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 10, 2019
5e0004c
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 11, 2019
7721404
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 15, 2019
217ce63
pre-rebase
jbrockmendel Jul 15, 2019
bca4e2a
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 15, 2019
d32c1bf
typo
jbrockmendel Jul 15, 2019
715002f
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 17, 2019
c001aca
update test for datetime series
jbrockmendel Jul 17, 2019
49eec2e
fixups
jbrockmendel Jul 17, 2019
96bda53
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 19, 2019
42183e9
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 20, 2019
7c260d5
remove apparently unnecessary reshape
jbrockmendel Jul 21, 2019
b035e4e
Merge branch 'master' of https://github.com/pandas-dev/pandas into wr…
jbrockmendel Jul 21, 2019
fbc1836
whatsnew
jbrockmendel Jul 22, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,9 @@ cpdef convert_scalar(ndarray arr, object value):
elif isinstance(value, (datetime, np.datetime64, date)):
return Timestamp(value).value
elif value is None or value != value:
return NPY_NAT
if not util.is_timedelta64_object(value):
# exclude np.timedelta64("NaT")
return NPY_NAT
elif isinstance(value, str):
return Timestamp(value).value
raise ValueError("cannot set a Timestamp with a non-timestamp")
Expand All @@ -545,7 +547,9 @@ cpdef convert_scalar(ndarray arr, object value):
elif isinstance(value, timedelta):
return Timedelta(value).value
elif value is None or value != value:
return NPY_NAT
if not util.is_datetime64_object(value):
# exclude np.datetime64("NaT")
return NPY_NAT
elif isinstance(value, str):
return Timedelta(value).value
raise ValueError("cannot set a Timedelta with a non-timedelta")
Expand Down
26 changes: 25 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def __setitem__(
elif isinstance(value, self._scalar_type):
self._check_compatible_with(value)
value = self._unbox_scalar(value)
elif isna(value) or value == iNaT:
elif is_valid_na(value, self.dtype) or value == iNaT:
value = iNaT
else:
msg = (
Expand Down Expand Up @@ -1518,6 +1518,30 @@ def mean(self, skipna=True):
return self._box_func(result)


def is_valid_na(obj, dtype):
"""
isna check that excludes incompatible dtypes

Parameters
----------
obj : object
dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype

Returns
-------
bool
"""
if not isna(obj):
return False
if dtype.kind == "M":
return not isinstance(obj, np.timedelta64)
if dtype.kind == "m":
return not isinstance(obj, np.datetime64)

# must be PeriodDType
return not isinstance(obj, (np.datetime64, np.timedelta64))


# -------------------------------------------------------------------
# Shared Constructor Helpers

Expand Down
14 changes: 10 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2295,7 +2295,8 @@ def _try_coerce_args(self, other):
-------
base-type other
"""
if is_null_datetimelike(other):
if is_null_datetimelike(other) and not isinstance(other, np.timedelta64):
# exclude np.timedelta64("NaT")
other = tslibs.iNaT
elif isinstance(other, (datetime, np.datetime64, date)):
other = self._box_func(other)
Expand Down Expand Up @@ -2485,7 +2486,8 @@ def _try_coerce_args(self, other):
# add the tz back
other = self._holder(other, dtype=self.dtype)

elif is_null_datetimelike(other):
if is_null_datetimelike(other) and not isinstance(other, np.timedelta64):
# exclude np.timedelta64("NaT")
other = tslibs.iNaT
elif isinstance(other, self._holder):
if other.tz != self.values.tz:
Expand Down Expand Up @@ -2589,8 +2591,11 @@ def setitem(self, indexer, value):
try:
return super().setitem(indexer, value)
except (ValueError, TypeError):
obj_vals = self.values.astype(object)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you really need to do this?

if you actually do, then .reshape(1, -1) should just work (or use np.atleast_2d)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this really is needed.

reshape(1, -1) is what is already here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give an example that specific needs this 2d reshape?

this just is a completely different pattern than anywhere else. Why for example would the ObjectBlock construct not simply reshape (if needed); it already knows the dim & the values shape?

if self.ndim == 2 and obj_vals.ndim == 1:
obj_vals = obj_vals.reshape(1, -1)
newb = make_block(
self.values.astype(object), placement=self.mgr_locs, klass=ObjectBlock
obj_vals, placement=self.mgr_locs, klass=ObjectBlock, ndim=self.ndim
)
return newb.setitem(indexer, value)

Expand Down Expand Up @@ -2665,7 +2670,8 @@ def _try_coerce_args(self, other):
base-type other
"""

if is_null_datetimelike(other):
if is_null_datetimelike(other) and not isinstance(other, np.datetime64):
# exclude np.datetime64("NaT")
other = tslibs.iNaT
elif isinstance(other, (timedelta, np.timedelta64)):
other = Timedelta(other).value
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,8 @@ def setitem(key, value):
pass
elif is_timedelta64_dtype(self.dtype):
# reassign a null value to iNaT
if isna(value):
if isna(value) and not isinstance(value, np.datetime64):
# exclude np.datetime64("NaT")
value = iNaT

try:
Expand Down
139 changes: 139 additions & 0 deletions pandas/tests/series/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,145 @@ def test_timedelta_assignment():
tm.assert_series_equal(s, expected)


def test_timedelta_nat_assignment_series():
tdi = pd.timedelta_range("1s", periods=3, freq="s")
base = pd.Series(tdi)
expected = pd.Series([pd.NaT, tdi[1], tdi[2]], dtype="m8[ns]")

casting_nas = [pd.NaT, np.timedelta64("NaT", "ns")]
for nat in casting_nas:
ser = base.copy(deep=True)
ser[0] = nat
tm.assert_series_equal(ser, expected)

ser = base.copy(deep=True)
ser.loc[0] = nat
tm.assert_series_equal(ser, expected)

df = base.copy(deep=True).to_frame()
df.loc[0, 0] = nat
tm.assert_frame_equal(df, expected.to_frame())

df = base.copy(deep=True).to_frame()
df.iloc[0, 0] = nat
tm.assert_frame_equal(df, expected.to_frame())

# a specifically-datetime NaT should not be coerced to timedelta
expected = expected.astype(object)
non_casting_nas = [np.datetime64("NaT", "ns")]
for nat in non_casting_nas:
ser = base.copy(deep=True)
ser[0] = nat
tm.assert_series_equal(ser, expected)

ser = base.copy(deep=True)
ser.loc[0] = nat
tm.assert_series_equal(ser, expected)

ser = base.copy(deep=True)
ser.iloc[0] = nat
tm.assert_series_equal(ser, expected)

df = base.copy(deep=True).to_frame()
df.loc[0, 0] = nat
tm.assert_frame_equal(df, pd.DataFrame(expected, dtype=object))

df = base.copy(deep=True).to_frame()
df.iloc[0, 0] = nat
tm.assert_frame_equal(df, pd.DataFrame(expected, dtype=object))


@pytest.mark.parametrize("tz", [None, "US/Pacific"])
def test_datetime_nat_assignment_series(tz):
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
base = pd.Series(dti)
expected = pd.Series([pd.NaT, dti[1], dti[2]], dtype=dti.dtype)

casting_nas = [pd.NaT, np.datetime64("NaT", "ns")]
for nat in casting_nas:
ser = base.copy(deep=True)
ser[0] = nat
tm.assert_series_equal(ser, expected)

ser = base.copy(deep=True)
ser.loc[0] = nat
tm.assert_series_equal(ser, expected)

df = base.copy(deep=True).to_frame()
df.loc[0, 0] = nat
tm.assert_frame_equal(df, expected.to_frame())

df = base.copy(deep=True).to_frame()
df.iloc[0, 0] = nat
tm.assert_frame_equal(df, expected.to_frame())

# a specifically-timedelta NaT should not be coerced to datetime
expected = expected.astype(object)
non_casting_nas = [np.timedelta64("NaT", "ns")]
for nat in non_casting_nas:
expected[0] = nat
assert expected[0] is nat

ser = base.copy(deep=True)
ser[0] = nat
tm.assert_series_equal(ser, expected)

ser = base.copy(deep=True)
ser.loc[0] = nat
tm.assert_series_equal(ser, expected)

ser = base.copy(deep=True)
ser.iloc[0] = nat
tm.assert_series_equal(ser, expected)

df = base.copy(deep=True).to_frame()
df.loc[0, 0] = nat
tm.assert_frame_equal(df, pd.DataFrame(expected, dtype=object))

df = base.copy(deep=True).to_frame()
df.iloc[0, 0] = nat
tm.assert_frame_equal(df, pd.DataFrame(expected, dtype=object))


def test_timedelta_nat_assignment_array():
tdi = pd.timedelta_range("1s", periods=3, freq="s")
tda = tdi._data
expected = pd.TimedeltaIndex([pd.NaT, tdi[1], tdi[2]])._data

casting_nas = [pd.NaT, np.timedelta64("NaT", "ns")]
for nat in casting_nas:
arr = tda.copy()
arr[0] = nat

tm.assert_equal(arr, expected)

non_casting_nas = [np.datetime64("NaT", "ns")]
for nat in non_casting_nas:
arr = tda.copy()
with pytest.raises(TypeError):
arr[0] = nat


@pytest.mark.parametrize("tz", [None, "US/Pacific"])
def test_datetime_nat_assignment_array(tz):
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
dta = dti._data
expected = pd.DatetimeIndex([pd.NaT, dti[1], dti[2]])._data

casting_nas = [pd.NaT, np.datetime64("NaT", "ns")]
for nat in casting_nas:
arr = dta.copy()
arr[0] = nat

tm.assert_equal(arr, expected)

non_casting_nas = [np.timedelta64("NaT", "ns")]
for nat in non_casting_nas:
arr = dta.copy()
with pytest.raises(TypeError):
arr[0] = nat


def test_underlying_data_conversion():
# GH 4080
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
Expand Down