From 2a4dcf596fa2cdbd8cadd22caaaef36705932745 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 30 Nov 2020 21:02:31 -0800 Subject: [PATCH 1/3] Retain views with listlike indexers setitem --- pandas/core/indexing.py | 6 +++++- pandas/core/internals/managers.py | 17 +++++++++++++++-- pandas/tests/frame/indexing/test_setitem.py | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 28e59df995a83..e7cf8cae28b88 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -672,8 +672,12 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): and not com.is_bool_indexer(key) and all(is_hashable(k) for k in key) ): + # GH#38148 keys = self.obj.columns.union(key, sort=False) - self.obj._mgr = self.obj._mgr.reindex_axis(keys, 0) + + self.obj._mgr = self.obj._mgr.reindex_axis( + keys, axis=0, copy=False, consolidate=False, only_slice=True + ) def __setitem__(self, key, value): if isinstance(key, tuple): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 168dba25ba29c..93ab207d8ce12 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1236,6 +1236,8 @@ def reindex_axis( limit=None, fill_value=None, copy: bool = True, + consolidate: bool = True, + only_slice: bool = False, ): """ Conform block manager to new index. @@ -1246,7 +1248,13 @@ def reindex_axis( ) return self.reindex_indexer( - new_index, indexer, axis=axis, fill_value=fill_value, copy=copy + new_index, + indexer, + axis=axis, + fill_value=fill_value, + copy=copy, + consolidate=consolidate, + only_slice=only_slice, ) def reindex_indexer( @@ -1258,6 +1266,7 @@ def reindex_indexer( allow_dups: bool = False, copy: bool = True, consolidate: bool = True, + only_slice: bool = False, ) -> T: """ Parameters @@ -1270,6 +1279,8 @@ def reindex_indexer( copy : bool, default True consolidate: bool, default True Whether to consolidate inplace before reindexing. + only_slice : bool, default False + Whether to take views, not copies, along columns. pandas-indexer with -1's only. """ @@ -1293,7 +1304,9 @@ def reindex_indexer( raise IndexError("Requested axis not found in manager") if axis == 0: - new_blocks = self._slice_take_blocks_ax0(indexer, fill_value=fill_value) + new_blocks = self._slice_take_blocks_ax0( + indexer, fill_value=fill_value, only_slice=only_slice + ) else: new_blocks = [ blk.take_nd( diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index e4a66ea9133dd..884cb6c20b77e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -319,6 +319,24 @@ def test_setitem_bool_with_numeric_index(self, dtype): tm.assert_index_equal(df.columns, expected_cols) +class TestDataFrameSetItemWithExpansion: + def test_setitem_listlike_views(self): + # GH#38148 + df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) + + # get one column as a view of df + ser = df["a"] + + # add columns with list-like indexer + df[["c", "d"]] = np.array([[0.1, 0.2], [0.3, 0.4], [0.4, 0.5]]) + + # edit in place the first column to check view semantics + df.iloc[0, 0] = 100 + + expected = Series([100, 2, 3], name="a") + tm.assert_series_equal(ser, expected) + + class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): # GH#31469 From 2235f758873bb554223baf603e3920de4e2ff2ef Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Dec 2020 09:29:51 -0800 Subject: [PATCH 2/3] port whatsnew, asv from #38148 --- asv_bench/benchmarks/indexing.py | 8 ++++++++ doc/source/whatsnew/v1.1.5.rst | 1 + 2 files changed, 9 insertions(+) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 74e0a3a434cde..4fd91c8aafe4b 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -358,6 +358,14 @@ def time_assign_with_setitem(self): for i in range(100): self.df[i] = np.random.randn(self.N) + def time_assign_list_like_with_setitem(self): + np.random.seed(1234) + self.df[list(range(100))] = np.random.randn(self.N, 100) + + def time_assign_list_of_columns_concat(self): + df = DataFrame(np.random.randn(self.N, 100)) + concat([self.df, df], axis=1) + class ChainIndexing: diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 0e2e510147603..da3008f41bc1b 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -25,6 +25,7 @@ Fixed regressions - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) +- Fixed performance regression for :meth:`DataFrame.__setitem__` with list-like indexers (:issue:`37954`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) .. --------------------------------------------------------------------------- From 6079258a7bc603cfbc4f009f817653677f9c8054 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Dec 2020 09:31:30 -0800 Subject: [PATCH 3/3] move note to 1.2.0 --- doc/source/whatsnew/v1.1.5.rst | 1 - doc/source/whatsnew/v1.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index da3008f41bc1b..0e2e510147603 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -25,7 +25,6 @@ Fixed regressions - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) -- Fixed performance regression for :meth:`DataFrame.__setitem__` with list-like indexers (:issue:`37954`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 84eb3b3f15780..46644a1187c5b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -539,6 +539,7 @@ Performance improvements - Performance improvement in :meth:`Series.astype` and :meth:`DataFrame.astype` for :class:`Categorical` (:issue:`8628`) - Performance improvement in :meth:`DataFrame.groupby` for ``float`` ``dtype`` (:issue:`28303`), changes of the underlying hash-function can lead to changes in float based indexes sort ordering for ties (e.g. :meth:`Index.value_counts`) - Performance improvement in :meth:`pd.isin` for inputs with more than 1e6 elements (:issue:`36611`) +- Performance improvement for :meth:`DataFrame.__setitem__` with list-like indexers (:issue:`37954`) .. ---------------------------------------------------------------------------