diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1b3be65ee66f2..05e7026b0faa3 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -540,6 +540,7 @@ Indexing - Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`) - Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`) - Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`44261`) +- Bug in :meth:`Series.__setitem__` with a boolean mask indexer setting a listlike value of length 1 incorrectly broadcasting that value (:issue:`44265`) - Missing diff --git a/pandas/core/series.py b/pandas/core/series.py index 391169af598c2..02f4810bb1e6b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1096,9 +1096,26 @@ def __setitem__(self, key, value) -> None: if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) key = np.asarray(key, dtype=bool) + + if ( + is_list_like(value) + and len(value) != len(self) + and not isinstance(value, Series) + and not is_object_dtype(self.dtype) + ): + # Series will be reindexed to have matching length inside + # _where call below + # GH#44265 + indexer = key.nonzero()[0] + self._set_values(indexer, value) + return + + # otherwise with listlike other we interpret series[mask] = other + # as series[mask] = other[mask] try: self._where(~key, value, inplace=True) except InvalidIndexError: + # test_where_dups self.iloc[key] = value return diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5f0710dfbb85a..4706025b70db6 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1064,3 +1064,43 @@ def test_setitem_with_bool_indexer(): df.loc[[True, False, False], "a"] = 10 expected = DataFrame({"a": [10, 2, 3]}) tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize("size", range(2, 6)) +@pytest.mark.parametrize( + "mask", [[True, False, False, False, False], [True, False], [False]] +) +@pytest.mark.parametrize( + "item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min] +) +# Test numpy arrays, lists and tuples as the input to be +# broadcast +@pytest.mark.parametrize( + "box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)] +) +def test_setitem_bool_indexer_dont_broadcast_length1_values(size, mask, item, box): + # GH#44265 + # see also tests.series.indexing.test_where.test_broadcast + + selection = np.resize(mask, size) + + data = np.arange(size, dtype=float) + + ser = Series(data) + + if selection.sum() != 1: + msg = ( + "cannot set using a list-like indexer with a different " + "length than the value" + ) + with pytest.raises(ValueError, match=msg): + # GH#44265 + ser[selection] = box(item) + else: + # In this corner case setting is equivalent to setting with the unboxed + # item + ser[selection] = box(item) + + expected = Series(np.arange(size, dtype=float)) + expected[selection] = item + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index fc9d3a1e1e6ab..88b75164d2f3e 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -88,7 +88,7 @@ def test_where_unsafe(): s = Series(np.arange(10)) mask = s > 5 - msg = "cannot assign mismatch length to masked array" + msg = "cannot set using a list-like indexer with a different length than the value" with pytest.raises(ValueError, match=msg): s[mask] = [5, 4, 3, 2, 1] @@ -161,13 +161,10 @@ def test_where_error(): tm.assert_series_equal(s, expected) # failures - msg = "cannot assign mismatch length to masked array" + msg = "cannot set using a list-like indexer with a different length than the value" with pytest.raises(ValueError, match=msg): s[[True, False]] = [0, 2, 3] - msg = ( - "NumPy boolean array indexing assignment cannot assign 0 input " - "values to the 1 output values where the mask is true" - ) + with pytest.raises(ValueError, match=msg): s[[True, False]] = [] @@ -298,6 +295,7 @@ def test_where_setitem_invalid(): "box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)] ) def test_broadcast(size, mask, item, box): + # GH#8801, GH#4195 selection = np.resize(mask, size) data = np.arange(size, dtype=float) @@ -309,7 +307,8 @@ def test_broadcast(size, mask, item, box): ) s = Series(data) - s[selection] = box(item) + + s[selection] = item tm.assert_series_equal(s, expected) s = Series(data)