From 6bcec8a8b1c4b1fce46595c27a1eb6a97ba01e10 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Jan 2022 13:41:06 -0800 Subject: [PATCH 1/3] BUG: df.iloc[:, 0] = df.iloc[::-1, 0] not setting inplace --- pandas/core/internals/blocks.py | 10 +------ pandas/tests/extension/base/setitem.py | 3 ++ pandas/tests/frame/test_constructors.py | 39 ++++++++++++++----------- pandas/tests/indexing/test_iloc.py | 8 +++-- 4 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d8543a15b1ea0..aa079ce18f8a2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1526,17 +1526,9 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]): return self.values def set_inplace(self, locs, values) -> None: - # NB: This is a misnomer, is supposed to be inplace but is not, - # see GH#33457 # When an ndarray, we should have locs.tolist() == [0] # When a BlockPlacement we should have list(locs) == [0] - self.values = values - try: - # TODO(GH33457) this can be removed - self._cache.clear() - except AttributeError: - # _cache not yet initialized - pass + self.values[:] = values def _maybe_squeeze_arg(self, arg): """ diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 208a1a1757be2..c244e30141790 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -380,6 +380,7 @@ def test_setitem_frame_2d_values(self, data, request): request.node.add_marker(mark) df = pd.DataFrame({"A": data}) + blk_data = df._mgr.arrays[0] orig = df.copy() df.iloc[:] = df @@ -390,6 +391,8 @@ def test_setitem_frame_2d_values(self, data, request): df.iloc[:] = df.values self.assert_frame_equal(df, orig) + # GH#33457 Check that this setting occurred in-place + assert df._mgr.arrays[0] is blk_data df.iloc[:-1] = df.values[:-1] self.assert_frame_equal(df, orig) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b84092338e426..276d0d2666aa8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2525,6 +2525,7 @@ def test_dict_nocopy( return c = pd.array([1, 2], dtype=any_numeric_ea_dtype) + c_orig = c.copy() df = DataFrame({"a": a, "b": b, "c": c}, copy=copy) def get_base(obj): @@ -2536,9 +2537,19 @@ def get_base(obj): else: raise TypeError - def check_views(): + def check_views(c_only: bool = False): # written to work for either BlockManager or ArrayManager + + # Check that the underlying data behind df["c"] is still `c` + # after setting with iloc. Since we don't know which entry in + # df._mgr.arrays corresponds to df["c"], we just check that exactly + # one of these arrays is `c`. GH#38939 assert sum(x is c for x in df._mgr.arrays) == 1 + if c_only: + # If we ever stop consolidating in setitem_with_indexer, + # this will become unnecessary. + return + assert ( sum( get_base(x) is a @@ -2560,23 +2571,18 @@ def check_views(): # constructor preserves views check_views() + # TODO: most of the rest of this test belongs in indexing tests df.iloc[0, 0] = 0 df.iloc[0, 1] = 0 if not copy: - # Check that the underlying data behind df["c"] is still `c` - # after setting with iloc. Since we don't know which entry in - # df._mgr.arrays corresponds to df["c"], we just check that exactly - # one of these arrays is `c`. GH#38939 - assert sum(x is c for x in df._mgr.arrays) == 1 - # TODO: we can call check_views if we stop consolidating - # in setitem_with_indexer + check_views(True) # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve # view, so we have to check in the other direction - # df.iloc[0, 2] = 0 - # if not copy: - # check_views() - c[0] = 0 + df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) + assert df.dtypes.iloc[2] == c.dtype + if not copy: + check_views(True) if copy: if a.dtype.kind == "M": @@ -2586,14 +2592,13 @@ def check_views(): assert a[0] == a.dtype.type(1) assert b[0] == b.dtype.type(3) # FIXME(GH#35417): enable after GH#35417 - # assert c[0] == 1 - assert df.iloc[0, 2] == 1 + assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c else: # TODO: we can call check_views if we stop consolidating # in setitem_with_indexer - # FIXME(GH#35417): enable after GH#35417 - # assert b[0] == 0 - assert df.iloc[0, 2] == 0 + assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c + # TODO: we can check b[0] == 0 if we stop consolidating in + # setitem_with_indexer (except for datetimelike?) def test_from_series_with_name_with_columns(self): # GH 7893 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index a3b876089994b..3d53f1e23f332 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -880,16 +880,18 @@ def test_series_indexing_zerodim_np_array(self): result = s.iloc[np.array(0)] assert result == 1 - @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") def test_iloc_setitem_categorical_updates_inplace(self): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer cat = Categorical(["A", "B", "C"]) - df = DataFrame({1: cat, 2: [1, 2, 3]}) + df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False) + + assert tm.shares_memory(df[1], cat) # This should modify our original values in-place df.iloc[:, 0] = cat[::-1] + assert tm.shares_memory(df[1], cat) - expected = Categorical(["C", "B", "A"]) + expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"]) tm.assert_categorical_equal(cat, expected) def test_iloc_with_boolean_operation(self): From f968be55cb5f04a46530019db01f323ab31da25c Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Jan 2022 15:22:28 -0800 Subject: [PATCH 2/3] arraymanager compat --- pandas/tests/extension/base/setitem.py | 6 ++++-- pandas/tests/indexing/test_iloc.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index c244e30141790..68162bfe9c1fb 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -391,8 +391,10 @@ def test_setitem_frame_2d_values(self, data, request): df.iloc[:] = df.values self.assert_frame_equal(df, orig) - # GH#33457 Check that this setting occurred in-place - assert df._mgr.arrays[0] is blk_data + if not using_array_manager: + # GH#33457 Check that this setting occurred in-place + # FIXME(ArrayManager): this should work there too + assert df._mgr.arrays[0] is blk_data df.iloc[:-1] = df.values[:-1] self.assert_frame_equal(df, orig) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 3d53f1e23f332..38e59ceeef222 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -880,6 +880,7 @@ def test_series_indexing_zerodim_np_array(self): result = s.iloc[np.array(0)] assert result == 1 + @td.skip_array_manager_not_yet_implemented def test_iloc_setitem_categorical_updates_inplace(self): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer cat = Categorical(["A", "B", "C"]) From 57b668cf44f823208955f2216c53831ae816c0f4 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Jan 2022 10:48:23 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9a6455d4d012f..c37fd15a9fa1b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -157,6 +157,7 @@ Indexing - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`) - Bug when setting an integer too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`) +- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`) - Missing