From ab6871a2da7539adeac8a9cb35e4832c853dbf46 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 27 Mar 2024 19:02:54 -0700 Subject: [PATCH 1/3] DEPR: replace without passing value --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/_mixins.py | 7 --- pandas/core/arrays/base.py | 19 ------- pandas/core/generic.py | 57 +++++---------------- pandas/core/series.py | 35 ------------- pandas/tests/frame/methods/test_replace.py | 7 +-- pandas/tests/series/methods/test_replace.py | 41 +++++---------- 7 files changed, 29 insertions(+), 138 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 26dd6f83ad44a..c4eb9d3ff8079 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -206,6 +206,7 @@ Removal of prior version deprecations/changes - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) +- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a 'value' and with non-dict-like 'to_replace' (:issue:`33302`) - Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) - Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 7f4e6f6666382..d242702258ad7 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -296,13 +296,6 @@ def __getitem__( result = self._from_backing_data(result) return result - def _fill_mask_inplace( - self, method: str, limit: int | None, mask: npt.NDArray[np.bool_] - ) -> None: - # (for now) when self.ndim == 2, we assume axis=0 - func = missing.get_fill_func(method, ndim=self.ndim) - func(self._ndarray.T, limit=limit, mask=mask.T) - def _pad_or_backfill( self, *, diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 76615704f2e33..4515afa8a6883 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2111,25 +2111,6 @@ def _where(self, mask: npt.NDArray[np.bool_], value) -> Self: result[~mask] = val return result - # TODO(3.0): this can be removed once GH#33302 deprecation is enforced - def _fill_mask_inplace( - self, method: str, limit: int | None, mask: npt.NDArray[np.bool_] - ) -> None: - """ - Replace values in locations specified by 'mask' using pad or backfill. - - See also - -------- - ExtensionArray.fillna - """ - func = missing.get_fill_func(method) - npvalues = self.astype(object) - # NB: if we don't copy mask here, it may be altered inplace, which - # would mess up the `self[mask] = ...` below. - func(npvalues, limit=limit, mask=mask.copy()) - new_values = self._from_sequence(npvalues, dtype=self.dtype) - self[mask] = new_values[mask] - def _rank( self, *, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 858d2ba82a969..091cda7e9cf0c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7319,17 +7319,8 @@ def replace( inplace: bool = False, regex: bool = False, ) -> Self | None: - if value is lib.no_default and not is_dict_like(to_replace) and regex is False: - # case that goes through _replace_single and defaults to method="pad" - warnings.warn( - # GH#33302 - f"{type(self).__name__}.replace without 'value' and with " - "non-dict-like 'to_replace' is deprecated " - "and will raise in a future version. " - "Explicitly specify the new values instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) + if not is_bool(regex) and to_replace is not None: + raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") if not ( is_scalar(to_replace) @@ -7342,6 +7333,15 @@ def replace( f"{type(to_replace).__name__!r}" ) + if value is lib.no_default and not ( + is_dict_like(to_replace) or is_dict_like(regex) + ): + raise ValueError( + # GH#33302 + f"{type(self).__name__}.replace must specify either 'value', " + "a dict-like 'to_replace', or dict-like 'regex'." + ) + inplace = validate_bool_kwarg(inplace, "inplace") if inplace: if not PYPY: @@ -7352,41 +7352,10 @@ def replace( stacklevel=2, ) - if not is_bool(regex) and to_replace is not None: - raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") - if value is lib.no_default: - # GH#36984 if the user explicitly passes value=None we want to - # respect that. We have the corner case where the user explicitly - # passes value=None *and* a method, which we interpret as meaning - # they want the (documented) default behavior. - - # passing a single value that is scalar like - # when value is None (GH5319), for compat - if not is_dict_like(to_replace) and not is_dict_like(regex): - to_replace = [to_replace] - - if isinstance(to_replace, (tuple, list)): - # TODO: Consider copy-on-write for non-replaced columns's here - if isinstance(self, ABCDataFrame): - from pandas import Series - - result = self.apply( - Series._replace_single, - args=(to_replace, inplace), - ) - if inplace: - return None - return result - return self._replace_single(to_replace, inplace) - if not is_dict_like(to_replace): - if not is_dict_like(regex): - raise TypeError( - 'If "to_replace" and "value" are both None ' - 'and "to_replace" is not a list, then ' - "regex must be a mapping" - ) + # In this case we have checked above that + # 1) regex is dict-like and 2) to_replace is None to_replace = regex regex = True diff --git a/pandas/core/series.py b/pandas/core/series.py index b0dc05fce7913..bd131f2212db3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -97,7 +97,6 @@ algorithms, base, common as com, - missing, nanops, ops, roperator, @@ -5112,40 +5111,6 @@ def info( show_counts=show_counts, ) - @overload - def _replace_single(self, to_replace, inplace: Literal[False]) -> Self: ... - - @overload - def _replace_single(self, to_replace, inplace: Literal[True]) -> None: ... - - @overload - def _replace_single(self, to_replace, inplace: bool) -> Self | None: ... - - # TODO(3.0): this can be removed once GH#33302 deprecation is enforced - def _replace_single(self, to_replace, inplace: bool) -> Self | None: - """ - Replaces values in a Series using the fill method specified when no - replacement value is given in the replace method - """ - limit = None - method = "pad" - - result = self if inplace else self.copy() - - values = result._values - mask = missing.mask_missing(values, to_replace) - - if isinstance(values, ExtensionArray): - # dispatch to the EA's _pad_mask_inplace method - values._fill_mask_inplace(method, limit, mask) - else: - fill_f = missing.get_fill_func(method) - fill_f(values, limit=limit, mask=mask) - - if inplace: - return None - return result - def memory_usage(self, index: bool = True, deep: bool = False) -> int: """ Return the memory usage of the Series. diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 3b9c342f35a71..fb7ba2b7af38a 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1264,13 +1264,8 @@ def test_replace_invalid_to_replace(self): r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" ) - msg2 = ( - "DataFrame.replace without 'value' and with non-dict-like " - "'to_replace' is deprecated" - ) with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): - df.replace(lambda x: x.strip()) + df.replace(lambda x: x.strip()) @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) @pytest.mark.parametrize("value", [np.nan, pd.NA]) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 09a3469e73462..0a79bcea679a7 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -137,20 +137,15 @@ def test_replace_gh5319(self): # API change from 0.12? # GH 5319 ser = pd.Series([0, np.nan, 2, 3, 4]) - expected = ser.ffill() msg = ( - "Series.replace without 'value' and with non-dict-like " - "'to_replace' is deprecated" + "Series.replace must specify either 'value', " + "a dict-like 'to_replace', or dict-like 'regex'" ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.replace([np.nan]) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match=msg): + ser.replace([np.nan]) - ser = pd.Series([0, np.nan, 2, 3, 4]) - expected = ser.ffill() - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.replace(np.nan) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match=msg): + ser.replace(np.nan) def test_replace_datetime64(self): # GH 5797 @@ -182,19 +177,16 @@ def test_replace_timedelta_td64(self): def test_replace_with_single_list(self): ser = pd.Series([0, 1, 2, 3, 4]) - msg2 = ( - "Series.replace without 'value' and with non-dict-like " - "'to_replace' is deprecated" + msg = ( + "Series.replace must specify either 'value', " + "a dict-like 'to_replace', or dict-like 'regex'" ) - with tm.assert_produces_warning(FutureWarning, match=msg2): - result = ser.replace([1, 2, 3]) - tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) + with pytest.raises(ValueError, match=msg): + ser.replace([1, 2, 3]) s = ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg2): - return_value = s.replace([1, 2, 3], inplace=True) - assert return_value is None - tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) + with pytest.raises(ValueError, match=msg): + s.replace([1, 2, 3], inplace=True) def test_replace_mixed_types(self): ser = pd.Series(np.arange(5), dtype="int64") @@ -483,13 +475,8 @@ def test_replace_invalid_to_replace(self): r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" ) - msg2 = ( - "Series.replace without 'value' and with non-dict-like " - "'to_replace' is deprecated" - ) with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): - series.replace(lambda x: x.strip()) + series.replace(lambda x: x.strip()) @pytest.mark.parametrize("frame", [False, True]) def test_replace_nonbool_regex(self, frame): From 670822ea356f6337cece491087ba02dd29c0105d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 27 Mar 2024 20:59:53 -0700 Subject: [PATCH 2/3] update doctest --- pandas/core/shared_docs.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 2d8517693a2f8..38a443b56ee3d 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -608,24 +608,7 @@ 4 None dtype: object - When ``value`` is not explicitly passed and `to_replace` is a scalar, list - or tuple, `replace` uses the method parameter (default 'pad') to do the - replacement. So this is why the 'a' values are being replaced by 10 - in rows 1 and 2 and 'b' in row 4 in this case. - - >>> s.replace('a') - 0 10 - 1 10 - 2 10 - 3 b - 4 b - dtype: object - - .. deprecated:: 2.1.0 - The 'method' parameter and padding behavior are deprecated. - - On the other hand, if ``None`` is explicitly passed for ``value``, it will - be respected: + If ``None`` is explicitly passed for ``value``, it will be respected: >>> s.replace('a', None) 0 10 From 588d345f80d9884b24c66025fa89ad8d2a95c214 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 1 Apr 2024 11:14:06 -0700 Subject: [PATCH 3/3] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f52532963dbbc..991885e8eb35b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -206,7 +206,7 @@ Removal of prior version deprecations/changes - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) -- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a 'value' and with non-dict-like 'to_replace' (:issue:`33302`) +- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`) - Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`) - Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)