Skip to content

BUG/API: prohibit dtype-changing IntervalArray.__setitem__ #32782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 10, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,11 @@ def __setitem__(self, key, value):
msg = f"'value' should be an interval type, got {type(value)} instead."
raise TypeError(msg) from err

if needs_float_conversion:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're raising here we can remove the future references that are no longer being used:

if needs_float_conversion:
left = left.astype("float")

if needs_float_conversion:
right = right.astype("float")

raise ValueError("Cannot set float values for integer-backed IntervalArray")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The needs_float_conversion name might be a bit misleading here because it's not triggered when setting arbitrary float values but rather just when np.nan is included, so we're not generically disallowing setting float values. Maybe a more accurate error message would be along the lines "Cannot set float NaN to integer-backed IntervalArray".

FWIW the behavior when setting float values without np.nan included is to truncate the float and just use the integer component, which is consistent with numpy's behavior:

In [1]: import pandas as pd; import numpy as np

In [2]: ia = pd.arrays.IntervalArray.from_breaks(range(5))

In [3]: ia[0] = pd.Interval(0.9, 1.1)

In [4]: ia
Out[4]: 
<IntervalArray>
[(0, 1], (1, 2], (2, 3], (3, 4]]
Length: 4, closed: right, dtype: interval[int64]

In [5]: a = np.arange(4)

In [6]: a[0] = 0.9

In [7]: a[1] = 1.1

In [8]: a
Out[8]: array([0, 1, 2, 3])

I'm not crazy about that behavior but I'm fine with it since it's consistent with numpy.


key = check_array_indexer(self, key)

# Need to ensure that left and right are updated atomically, so we're
# forced to copy, update the copy, and swap in the new values.
left = self.left.copy(deep=True)
Expand Down
13 changes: 10 additions & 3 deletions pandas/tests/arrays/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ class TestSetitem:
def test_set_na(self, left_right_dtypes):
left, right = left_right_dtypes
result = IntervalArray.from_arrays(left, right)

if result.dtype.subtype.kind in ["i", "u"]:
msg = "Cannot set float values for integer-backed IntervalArray"
with pytest.raises(ValueError, match=msg):
result[0] = np.NaN
return

result[0] = np.nan

expected_left = Index([left._na_value] + list(left[1:]))
Expand Down Expand Up @@ -182,7 +189,7 @@ def test_arrow_array_missing():
import pyarrow as pa
from pandas.core.arrays._arrow_utils import ArrowIntervalType

arr = IntervalArray.from_breaks([0, 1, 2, 3])
arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0])
arr[1] = None

result = pa.array(arr)
Expand All @@ -209,8 +216,8 @@ def test_arrow_array_missing():
@pyarrow_skip
@pytest.mark.parametrize(
"breaks",
[[0, 1, 2, 3], pd.date_range("2017", periods=4, freq="D")],
ids=["int", "datetime64[ns]"],
[[0.0, 1.0, 2.0, 3.0], pd.date_range("2017", periods=4, freq="D")],
ids=["float", "datetime64[ns]"],
)
def test_arrow_table_roundtrip(breaks):
import pyarrow as pa
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_interval_dtype

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -266,7 +268,12 @@ def test_convert_dtypes(self, data, maindtype, params, answerdict):

# Test that it is a copy
copy = series.copy(deep=True)
ns[ns.notna()] = np.nan
if is_interval_dtype(ns.dtype) and ns.dtype.subtype.kind in ["i", "u"]:
msg = "Cannot set float values for integer-backed IntervalArray"
with pytest.raises(ValueError, match=msg):
ns[ns.notna()] = np.nan
else:
ns[ns.notna()] = np.nan

# Make sure original not changed
tm.assert_series_equal(series, copy)
Expand Down