From 19fe4dadac89ee25697ac87bc2e436c4ad7c661a Mon Sep 17 00:00:00 2001 From: Amanda Bizzinotto Date: Sun, 2 Jul 2023 06:00:52 -0300 Subject: [PATCH 1/3] Preserve dtype of dataframe columns if no replacement --- pandas/core/array_algos/replace.py | 18 ++++++++++++++++++ pandas/core/generic.py | 13 ++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 85d1f7ccf2e88..aa695eeb9353d 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -23,6 +23,7 @@ if TYPE_CHECKING: from pandas._typing import ( ArrayLike, + NDFrame, Scalar, npt, ) @@ -150,3 +151,20 @@ def re_replacer(s): values[:] = f(values) else: values[mask] = f(values[mask]) + + +def keep_original_dtypes(result: NDFrame, original: NDFrame) -> NDFrame: + """ + Keep same data types as the original input if no replacements have been made. + + Parameters + ---------- + result: NDFrame + original: NDFrame + """ + # Perform operation only if input is a dataframe, not a series + if hasattr(original, "columns"): + for col in original.columns: + if result[col].astype(str).equals(original[col].astype(str)): + result[col] = result[col].astype(original[col].dtypes) + return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b806ddbaa89ba..798525d59de26 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -149,7 +149,10 @@ nanops, sample, ) -from pandas.core.array_algos.replace import should_use_regex +from pandas.core.array_algos.replace import ( + keep_original_dtypes, + should_use_regex, +) from pandas.core.arrays import ExtensionArray from pandas.core.base import PandasObject from pandas.core.construction import extract_array @@ -7594,6 +7597,12 @@ def replace( ) inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + # When replace is called on a copy of a dataframe with inplace set to True, + # self gets overridden when new_data is generated. This allows for a copy + # of the original dataframe to be retained, so if there are no replacements + # the original data types can be kept and returned + original_df = self.copy() if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") @@ -7763,8 +7772,10 @@ def replace( result = self._constructor_from_mgr(new_data, axes=new_data.axes) if inplace: + result = keep_original_dtypes(result, original_df) return self._update_inplace(result) else: + result = keep_original_dtypes(result, self) return result.__finalize__(self, method="replace") @final From 6c69512738acaf54aab68ed81cefef134af1c464 Mon Sep 17 00:00:00 2001 From: Amanda Bizzinotto Date: Sun, 2 Jul 2023 06:10:16 -0300 Subject: [PATCH 2/3] Add test case --- pandas/tests/frame/methods/test_replace.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9256df72cdf7b..ec4aa957bf6bd 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1515,6 +1515,12 @@ def test_replace_with_nil_na(self): result = ser.replace("nil", "anything else") tm.assert_frame_equal(expected, result) + def test_replace_no_replacements_dtypes(self): + # GH 53539 + df = DataFrame({"a": [1, 2], "b": [3, 4]}, dtype="object") + res = df.replace({5: 0}) + tm.assert_frame_equal(df, res) + class TestDataFrameReplaceRegex: @pytest.mark.parametrize( From 691e3e157b0ec179dbff7fffb24879a2f227b1a8 Mon Sep 17 00:00:00 2001 From: Amanda Bizzinotto Date: Sun, 2 Jul 2023 06:10:22 -0300 Subject: [PATCH 3/3] Add note to whatsnew --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ebbdbcb0f61f5..ee5db92796169 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -546,6 +546,7 @@ Other - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) +- Bug in :meth:`DataFrame.replace` changing column dtypes even when no replacements are made (:issue:`53539`) - Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`) - Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`) - Bug in :meth:`Series.align`, :meth:`DataFrame.align`, :meth:`Series.reindex`, :meth:`DataFrame.reindex`, :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, incorrectly failing to raise with method="asfreq" (:issue:`53620`)