From 272ccee2028f2dfb5aeb9050b1e88157f223edbe Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Mar 2022 10:19:28 -0800 Subject: [PATCH 1/4] PERF: avoid copies in frame[col].fillna(val, inplace=True) GH#46149 --- pandas/core/frame.py | 6 ++++++ pandas/core/generic.py | 2 +- pandas/tests/frame/methods/test_fillna.py | 13 +++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf1988808bbb0..a0b680d177181 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3961,6 +3961,12 @@ def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None: """ loc = self._info_axis.get_loc(item) arraylike = value._values + + old = self._ixs(loc, axis=1) + if old._values is value._values and inplace: + # GH#46149 avoid making unnecessary copies/block-splitting + return + self._mgr.iset(loc, arraylike, inplace=inplace) # ---------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9a9697b201b43..e412a0da46d1e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4368,7 +4368,7 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: self._reset_cache() self._clear_item_cache() self._mgr = result._mgr - self._maybe_update_cacher(verify_is_copy=verify_is_copy) + self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True) @final def add_prefix(self: NDFrameT, prefix: str) -> NDFrameT: diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 77ae9fb4c7eff..fc8ec77d95a4a 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -17,6 +17,19 @@ class TestFillNA: + @tm.skip_array_manager_not_yet_implemented + def test_fillna_on_column_view(self): + # GH#46149 avoid unnecessary copies + arr = np.full((40, 50), np.nan) + df = DataFrame(arr) + + df[0].fillna(-1, inplace=True) + assert (arr[:, 0] == -1).all() + + # i.e. we didn't create a new 49-column block + assert len(df._mgr.arrays) == 1 + assert np.shares_memory(df.values, arr) + def test_fillna_datetime(self, datetime_frame): tf = datetime_frame tf.loc[tf.index[:5], "A"] = np.nan From 2128dad1fc84984485f738c1dc40dad4411f7db5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Mar 2022 16:26:43 -0800 Subject: [PATCH 2/4] typo tm->td --- pandas/tests/frame/methods/test_fillna.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index fc8ec77d95a4a..5008e64dd0e99 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( Categorical, DataFrame, @@ -17,7 +19,7 @@ class TestFillNA: - @tm.skip_array_manager_not_yet_implemented + @td.skip_array_manager_not_yet_implemented def test_fillna_on_column_view(self): # GH#46149 avoid unnecessary copies arr = np.full((40, 50), np.nan) From c46264a4485b95510f9f44baef5ab00295a2d42d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Mar 2022 11:35:33 -0800 Subject: [PATCH 3/4] Whatsnew --- doc/source/whatsnew/v1.4.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index 43b911cd24e1d..51ab199709127 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -15,6 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`) +- Fixed performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`) - .. --------------------------------------------------------------------------- From 5f1251feb2c370b05d8a81a3e3def8976f79bd07 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Mar 2022 11:36:14 -0800 Subject: [PATCH 4/4] typo fixup --- doc/source/whatsnew/v1.4.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index 51ab199709127..9f4d248ca7042 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`) -- Fixed performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`) +- Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`) - .. ---------------------------------------------------------------------------