From 789e5f71961d5d515664ab1e66a5077b738b62c2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 11 Nov 2020 19:47:49 -0800 Subject: [PATCH 1/3] REF: simplify Block.replace --- pandas/core/internals/blocks.py | 33 ++++++--------------- pandas/tests/frame/methods/test_replace.py | 16 +++++----- pandas/tests/series/methods/test_replace.py | 8 +++-- 3 files changed, 22 insertions(+), 35 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ed77a210b6913..e8857c9054507 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -778,36 +778,21 @@ def replace( to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype) mask = missing.mask_missing(values, to_replace) + if not mask.any(): + # Note: we get here with test_replace_extension_other incorreclty + # bc _can_hold_element is incorrect. + return [self] if inplace else [self.copy()] - try: - blocks = self.putmask(mask, value, inplace=inplace) - # Note: it is _not_ the case that self._can_hold_element(value) - # is always true at this point. In particular, that can fail - # for: - # "2u" with bool-dtype, float-dtype - # 0.5 with int64-dtype - # np.nan with int64-dtype - except (TypeError, ValueError): - # GH 22083, TypeError or ValueError occurred within error handling - # causes infinite loop. Cast and retry only if not objectblock. - if is_object_dtype(self): - raise - - if not self.is_extension: - # TODO: https://github.com/pandas-dev/pandas/issues/32586 - # Need an ExtensionArray._can_hold_element to indicate whether - # a scalar value can be placed in the array. - assert not self._can_hold_element(value), value - - # try again with a compatible block - block = self.astype(object) - return block.replace( + if not self._can_hold_element(value): + blk = self.astype(object) + return blk.replace( to_replace=original_to_replace, value=value, - inplace=inplace, + inplace=True, regex=regex, ) + blocks = self.putmask(mask, value, inplace=inplace) blocks = extend_blocks( [b.convert(numeric=False, copy=not inplace) for b in blocks] ) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index baa310ddd6f09..bd8f6280ae529 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1517,18 +1517,18 @@ def test_replace_with_duplicate_columns(self, replacement): tm.assert_frame_equal(result, expected) - @pytest.mark.xfail( - reason="replace() changes dtype from period to object, see GH34871", strict=True - ) - def test_replace_period_ignore_float(self): + def test_replace_period_ignore_float(self, frame_or_series): """ Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df with a Period column the old, faulty behavior is to raise TypeError. """ - df = DataFrame({"Per": [pd.Period("2020-01")] * 3}) - result = df.replace(1.0, 0.0) - expected = DataFrame({"Per": [pd.Period("2020-01")] * 3}) - tm.assert_frame_equal(expected, result) + obj = DataFrame({"Per": [pd.Period("2020-01")] * 3}) + if frame_or_series is not DataFrame: + obj = obj["Per"] + + expected = obj.copy() + result = obj.replace(1.0, 0.0) + tm.assert_equal(expected, result) def test_replace_value_category_type(self): """ diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 79d6fc22aba97..e009a01c17005 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -437,10 +437,12 @@ def test_replace_only_one_dictlike_arg(self): with pytest.raises(ValueError, match=msg): ser.replace(to_replace, value) - def test_replace_extension_other(self): + def test_replace_extension_other(self, frame_or_series): # https://github.com/pandas-dev/pandas/issues/34530 - ser = pd.Series(pd.array([1, 2, 3], dtype="Int64")) - ser.replace("", "") # no exception + obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64")) + result = obj.replace("", "") # no exception + # should not have changed dtype + tm.assert_equal(obj, result) def test_replace_with_compiled_regex(self): # https://github.com/pandas-dev/pandas/issues/35680 From 90404f0600ee79abcc7b1e30c6a3474b03d9c584 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 11 Nov 2020 19:49:08 -0800 Subject: [PATCH 2/3] typo fixup --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e8857c9054507..9799f2e043f45 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -779,7 +779,7 @@ def replace( mask = missing.mask_missing(values, to_replace) if not mask.any(): - # Note: we get here with test_replace_extension_other incorreclty + # Note: we get here with test_replace_extension_other incorrectly # bc _can_hold_element is incorrect. return [self] if inplace else [self.copy()] From 1829faf0329ba5560852f5cadb8c862c61a121ce Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 12 Nov 2020 21:03:05 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0d3912847dca0..e9b6d53a095d3 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -580,6 +580,7 @@ Other - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly casting from ``PeriodDtype`` to object dtype (:issue:`34871`) - Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`) - Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`) (:issue:`37381`) - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)