diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2a718fdcf16e7..0430db0c9dda7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -537,7 +537,8 @@ Indexing - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`) - Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`) - Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`) - +- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`44261`) +- Missing ^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c0ac9098ec7fc..8be4fc13ed991 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2197,6 +2197,9 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: tipo = maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: + if isinstance(element, range): + return _dtype_can_hold_range(element, dtype) + if tipo is not None: if tipo.kind not in ["i", "u"]: if is_float(element) and element.is_integer(): @@ -2209,6 +2212,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: # i.e. nullable IntegerDtype; we can put this into an ndarray # losslessly iff it has no NAs return not element._mask.any() + return True # We have not inferred an integer from the dtype @@ -2249,3 +2253,14 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: return isinstance(element, bytes) and len(element) <= dtype.itemsize raise NotImplementedError(dtype) + + +def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: + """ + maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), + but in many cases a range can be held by a smaller integer dtype. + Check if this is one of those cases. + """ + if not len(rng): + return True + return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py new file mode 100644 index 0000000000000..c4776f2a1e143 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -0,0 +1,42 @@ +import numpy as np + +from pandas.core.dtypes.cast import can_hold_element + + +def test_can_hold_element_range(any_int_numpy_dtype): + # GH#44261 + dtype = np.dtype(any_int_numpy_dtype) + arr = np.array([], dtype=dtype) + + rng = range(2, 127) + assert can_hold_element(arr, rng) + + # negatives -> can't be held by uint dtypes + rng = range(-2, 127) + if dtype.kind == "i": + assert can_hold_element(arr, rng) + else: + assert not can_hold_element(arr, rng) + + rng = range(2, 255) + if dtype == "int8": + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + rng = range(-255, 65537) + if dtype.kind == "u": + assert not can_hold_element(arr, rng) + elif dtype.itemsize < 4: + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + # empty + rng = range(-(10 ** 10), -(10 ** 10)) + assert len(rng) == 0 + # assert can_hold_element(arr, rng) + + rng = range(10 ** 10, 10 ** 10) + assert len(rng) == 0 + assert can_hold_element(arr, rng) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5521bee09b19b..5f0710dfbb85a 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.core.dtypes.common import is_list_like + from pandas import ( Categorical, DataFrame, @@ -622,6 +624,16 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli): tm.assert_series_equal(obj, expected) def test_series_where(self, obj, key, expected, val, is_inplace): + if is_list_like(val) and len(val) < len(obj): + # Series.where is not valid here + if isinstance(val, range): + return + + # FIXME: The remaining TestSetitemDT64IntoInt that go through here + # are relying on technically-incorrect behavior because Block.where + # uses np.putmask instead of expressions.where in those cases, + # which has different length-checking semantics. + mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -973,6 +985,35 @@ def expected(self, obj, val): return Series(idx) +class TestSetitemRangeIntoIntegerSeries(SetitemCastingEquivalents): + # GH#44261 Setting a range with sufficiently-small integers into + # small-itemsize integer dtypes should not need to upcast + + @pytest.fixture + def obj(self, any_int_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + ser = Series(range(5), dtype=dtype) + return ser + + @pytest.fixture + def val(self): + return range(2, 4) + + @pytest.fixture + def key(self): + return slice(0, 2) + + @pytest.fixture + def expected(self, any_int_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + exp = Series([2, 3, 2, 3, 4], dtype=dtype) + return exp + + @pytest.fixture + def inplace(self): + return True + + def test_setitem_int_as_positional_fallback_deprecation(): # GH#42215 deprecated falling back to positional on __setitem__ with an # int not contained in the index