diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 3f0744abd1d59..9ad53cd189348 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -535,6 +535,7 @@ Indexing - Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`) - Bug in indexing on a non-unique object-dtype :class:`Index` with an NA scalar (e.g. ``np.nan``) (:issue:`43711`) - Bug in :meth:`DataFrame.__setitem__` incorrectly writing into an existing column's array rather than setting a new array when the new dtype and the old dtype match (:issue:`43406`) +- Bug in setting floating-dtype values into a :class:`Series` with integer dtype failing to set inplace when those values can be losslessly converted to integers (:issue:`44316`) - Bug in :meth:`Series.__setitem__` with object dtype when setting an array with matching size and dtype='datetime64[ns]' or dtype='timedelta64[ns]' incorrectly converting the datetime/timedeltas to integers (:issue:`43868`) - Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`) - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 261359767cf60..432074a8dd699 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2205,6 +2205,14 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: if tipo.kind not in ["i", "u"]: if is_float(element) and element.is_integer(): return True + + if isinstance(element, np.ndarray) and element.dtype.kind == "f": + # If all can be losslessly cast to integers, then we can hold them + # We do something similar in putmask_smart + casted = element.astype(dtype) + comp = casted == element + return comp.all() + # Anything other than integer we cannot hold return False elif dtype.itemsize < tipo.itemsize: diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 40aa70a2ada2f..4d8c411478993 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -425,6 +425,18 @@ def asi8(self) -> npt.NDArray[np.int64]: ) return self._values.view(self._default_dtype) + def _validate_fill_value(self, value): + # e.g. np.array([1.0]) we want np.array([1], dtype=self.dtype) + # see TestSetitemFloatNDarrayIntoIntegerSeries + super()._validate_fill_value(value) + if hasattr(value, "dtype") and is_float_dtype(value.dtype): + converted = value.astype(self.dtype) + if (converted == value).all(): + # See also: can_hold_element + return converted + raise TypeError + return value + class Int64Index(IntegerIndex): _index_descr_args = { diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 33c78f396b80b..2589015e0f0b1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1193,6 +1193,14 @@ def where(self, other, cond) -> list[Block]: values, icond.sum(), other # type: ignore[arg-type] ) if alt is not other: + if is_list_like(other) and len(other) < len(values): + # call np.where with other to get the appropriate ValueError + np.where(~icond, values, other) + raise NotImplementedError( + "This should not be reached; call to np.where above is " + "expected to raise ValueError. Please report a bug at " + "github.com/pandas-dev/pandas" + ) result = values.copy() np.putmask(result, icond, alt) else: diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py index c4776f2a1e143..3a486f795f23e 100644 --- a/pandas/tests/dtypes/cast/test_can_hold_element.py +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -40,3 +40,16 @@ def test_can_hold_element_range(any_int_numpy_dtype): rng = range(10 ** 10, 10 ** 10) assert len(rng) == 0 assert can_hold_element(arr, rng) + + +def test_can_hold_element_int_values_float_ndarray(): + arr = np.array([], dtype=np.int64) + + element = np.array([1.0, 2.0]) + assert can_hold_element(arr, element) + + assert not can_hold_element(arr, element + 0.5) + + # integer but not losslessly castable to int64 + element = np.array([3, 2 ** 65], dtype=np.float64) + assert not can_hold_element(arr, element) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 5d0aeba4aebbc..b97aaf6c551d8 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -196,16 +196,35 @@ def test_multiindex_assignment(self): df.loc[4, "d"] = arr tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) + def test_multiindex_assignment_single_dtype(self, using_array_manager): + # GH3777 part 2b # single dtype + arr = np.array([0.0, 1.0]) + df = DataFrame( np.random.randint(5, 10, size=9).reshape(3, 3), columns=list("abc"), index=[[4, 4, 8], [8, 10, 12]], + dtype=np.int64, ) + view = df["c"].iloc[:2].values + # arr can be losslessly cast to int, so this setitem is inplace df.loc[4, "c"] = arr - exp = Series(arr, index=[8, 10], name="c", dtype="float64") - tm.assert_series_equal(df.loc[4, "c"], exp) + exp = Series(arr, index=[8, 10], name="c", dtype="int64") + result = df.loc[4, "c"] + tm.assert_series_equal(result, exp) + if not using_array_manager: + # FIXME(ArrayManager): this correctly preserves dtype, + # but incorrectly is not inplace. + # extra check for inplace-ness + tm.assert_numpy_array_equal(view, exp.values) + + # arr + 0.5 cannot be cast losslessly to int, so we upcast + df.loc[4, "c"] = arr + 0.5 + result = df.loc[4, "c"] + exp = exp + 0.5 + tm.assert_series_equal(result, exp) # scalar ok df.loc[4, "c"] = 10 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 046d349b92f3f..d446d606d726f 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -515,9 +515,18 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( # but on a DataFrame with multiple blocks df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) + # setting float values that can be held by existing integer arrays + # is inplace df.iloc[:, 0] = df.iloc[:, 0].astype("f8") + if not using_array_manager: + assert len(df._mgr.blocks) == 1 + + # if the assigned values cannot be held by existing integer arrays, + # we cast + df.iloc[:, 0] = df.iloc[:, 0] + 0.5 if not using_array_manager: assert len(df._mgr.blocks) == 2 + expected = df.copy() # assign back to self @@ -892,7 +901,7 @@ def test_iloc_with_boolean_operation(self): tm.assert_frame_equal(result, expected) result.iloc[[False, False, True, True]] /= 2 - expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]]) + expected = DataFrame([[0, 4.0], [8, 12.0], [4, 5.0], [6, np.nan]]) tm.assert_frame_equal(result, expected) def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 4706025b70db6..ea754127b98e9 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -620,23 +620,27 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli): mask[key] = True obj = obj.copy() + + if is_list_like(val) and len(val) < mask.sum(): + msg = "boolean index did not match indexed array along dimension" + with pytest.raises(IndexError, match=msg): + indexer_sli(obj)[mask] = val + return + indexer_sli(obj)[mask] = val tm.assert_series_equal(obj, expected) def test_series_where(self, obj, key, expected, val, is_inplace): - if is_list_like(val) and len(val) < len(obj): - # Series.where is not valid here - if isinstance(val, range): - return - - # FIXME: The remaining TestSetitemDT64IntoInt that go through here - # are relying on technically-incorrect behavior because Block.where - # uses np.putmask instead of expressions.where in those cases, - # which has different length-checking semantics. - mask = np.zeros(obj.shape, dtype=bool) mask[key] = True + if is_list_like(val) and len(val) < len(obj): + # Series.where is not valid here + msg = "operands could not be broadcast together with shapes" + with pytest.raises(ValueError, match=msg): + obj.where(~mask, val) + return + orig = obj obj = obj.copy() arr = obj._values @@ -1014,6 +1018,39 @@ def inplace(self): return True +@pytest.mark.parametrize( + "val", + [ + np.array([2.0, 3.0]), + np.array([2.5, 3.5]), + np.array([2 ** 65, 2 ** 65 + 1], dtype=np.float64), # all ints, but can't cast + ], +) +class TestSetitemFloatNDarrayIntoIntegerSeries(SetitemCastingEquivalents): + @pytest.fixture + def obj(self): + return Series(range(5), dtype=np.int64) + + @pytest.fixture + def key(self): + return slice(0, 2) + + @pytest.fixture + def inplace(self, val): + # NB: this condition is based on currently-harcoded "val" cases + return val[0] == 2 + + @pytest.fixture + def expected(self, val, inplace): + if inplace: + dtype = np.int64 + else: + dtype = np.float64 + res_values = np.array(range(5), dtype=dtype) + res_values[:2] = val + return Series(res_values) + + def test_setitem_int_as_positional_fallback_deprecation(): # GH#42215 deprecated falling back to positional on __setitem__ with an # int not contained in the index