From 03bb1a4b462717d02793e5579edbd57c7365bdb6 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 31 Oct 2021 18:56:55 -0700 Subject: [PATCH 1/3] BUG: Series[int8][:3] = range(3) unnecessary upcasting to int64 --- pandas/core/dtypes/cast.py | 15 +++++++ .../dtypes/cast/test_can_hold_element.py | 41 +++++++++++++++++++ pandas/tests/series/indexing/test_setitem.py | 41 +++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 pandas/tests/dtypes/cast/test_can_hold_element.py diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d8c58d1eaf4c7..4cd6a8e3d3677 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2197,6 +2197,9 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: tipo = maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: + if isinstance(element, range): + return _dtype_can_hold_range(element, dtype) + if tipo is not None: if tipo.kind not in ["i", "u"]: if is_float(element) and element.is_integer(): @@ -2209,6 +2212,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: # i.e. nullable IntegerDtype; we can put this into an ndarray # losslessly iff it has no NAs return not element._mask.any() + return True # We have not inferred an integer from the dtype @@ -2249,3 +2253,14 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: return isinstance(element, bytes) and len(element) <= dtype.itemsize raise NotImplementedError(dtype) + + +def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: + """ + maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), + but in many cases a range can be held by a smaller integer dtype. + Check if this is one of those cases. + """ + if not len(rng): + return True + return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py new file mode 100644 index 0000000000000..e61915d595c41 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -0,0 +1,41 @@ +import numpy as np + +from pandas.core.dtypes.cast import can_hold_element + + +def test_can_hold_element_range(any_int_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + arr = np.array([], dtype=dtype) + + rng = range(2, 127) + assert can_hold_element(arr, rng) + + # negatives -> can't be held by uint dtypes + rng = range(-2, 127) + if dtype.kind == "i": + assert can_hold_element(arr, rng) + else: + assert not can_hold_element(arr, rng) + + rng = range(2, 255) + if dtype == "int8": + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + rng = range(-255, 65537) + if dtype.kind == "u": + assert not can_hold_element(arr, rng) + elif dtype.itemsize < 4: + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + # empty + rng = range(-(10 ** 10), -(10 ** 10)) + assert len(rng) == 0 + # assert can_hold_element(arr, rng) + + rng = range(10 ** 10, 10 ** 10) + assert len(rng) == 0 + assert can_hold_element(arr, rng) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5521bee09b19b..d001e6c13d78a 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.core.dtypes.common import is_list_like + from pandas import ( Categorical, DataFrame, @@ -622,6 +624,16 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli): tm.assert_series_equal(obj, expected) def test_series_where(self, obj, key, expected, val, is_inplace): + if is_list_like(val) and len(val) < len(obj): + # Series.where is not valid here + if isinstance(val, range): + return + + # FIXME: The remaining TestSetitemDT64IntoInt that go through here + # are relying on technically-incorrect behavior because Block.where + # uses np.putmask instead of expressions.where in those cases, + # which has different length-checking semantics. + mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -973,6 +985,35 @@ def expected(self, obj, val): return Series(idx) +class TestSetitemRangeIntoIntegerSeries(SetitemCastingEquivalents): + # Setting a range with sufficiently-small integers into small-itemsize + # integer dtypes should not need to upcast + + @pytest.fixture + def obj(self, any_int_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + ser = Series(range(5), dtype=dtype) + return ser + + @pytest.fixture + def val(self): + return range(2, 4) + + @pytest.fixture + def key(self): + return slice(0, 2) + + @pytest.fixture + def expected(self, any_int_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + exp = Series([2, 3, 2, 3, 4], dtype=dtype) + return exp + + @pytest.fixture + def inplace(self): + return True + + def test_setitem_int_as_positional_fallback_deprecation(): # GH#42215 deprecated falling back to positional on __setitem__ with an # int not contained in the index From 1ff9250d1f2a38c0f67156cec89c5a1c426a169c Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 31 Oct 2021 18:58:25 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 5601048c409e1..6ec3ac4cb6ede 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -535,9 +535,9 @@ Indexing - Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`) - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`) - Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`) +- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`??`) - - Missing ^^^^^^^ - Bug in :meth:`DataFrame.fillna` with limit and no method ignores axis='columns' or ``axis = 1`` (:issue:`40989`) From d86ac0a8b67175a5431b2ba45f3fdc4ace319c0d Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 31 Oct 2021 18:59:46 -0700 Subject: [PATCH 3/3] GH refs --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/dtypes/cast/test_can_hold_element.py | 1 + pandas/tests/series/indexing/test_setitem.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 6ec3ac4cb6ede..f692d5cf5233c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -535,7 +535,7 @@ Indexing - Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`) - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`) - Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`) -- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`??`) +- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`44261`) - Missing diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py index e61915d595c41..c4776f2a1e143 100644 --- a/pandas/tests/dtypes/cast/test_can_hold_element.py +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -4,6 +4,7 @@ def test_can_hold_element_range(any_int_numpy_dtype): + # GH#44261 dtype = np.dtype(any_int_numpy_dtype) arr = np.array([], dtype=dtype) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index d001e6c13d78a..5f0710dfbb85a 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -986,8 +986,8 @@ def expected(self, obj, val): class TestSetitemRangeIntoIntegerSeries(SetitemCastingEquivalents): - # Setting a range with sufficiently-small integers into small-itemsize - # integer dtypes should not need to upcast + # GH#44261 Setting a range with sufficiently-small integers into + # small-itemsize integer dtypes should not need to upcast @pytest.fixture def obj(self, any_int_numpy_dtype):