From 1b29c44ab3a3c7a055396fb6aa089445464fd669 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Jan 2022 18:19:55 -0800 Subject: [PATCH] API/DEPR: reintroduce errors kwd in where, mask, deprecate inconsistent behaviors --- doc/source/whatsnew/v1.4.0.rst | 1 - pandas/core/generic.py | 9 +- pandas/core/internals/array_manager.py | 6 +- pandas/core/internals/blocks.py | 128 +++++++++++++++--- pandas/core/internals/managers.py | 6 +- .../tests/arrays/categorical/test_indexing.py | 12 +- pandas/tests/frame/indexing/test_where.py | 23 +++- pandas/tests/series/methods/test_fillna.py | 16 +-- 8 files changed, 153 insertions(+), 48 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index a4a875513c587..b503e686ac45d 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -607,7 +607,6 @@ Other Deprecations - Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`) - Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`) - Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`,:issue:`44940`) -- Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and :meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`) - Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`) - Deprecated passing non boolean argument to sort in :func:`concat` (:issue:`41518`) - Deprecated passing arguments as positional for :func:`read_fwf` other than ``filepath_or_buffer`` (:issue:`41485`): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b92272d72b399..2fe379ced36b0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9010,14 +9010,6 @@ def _where( """ inplace = validate_bool_kwarg(inplace, "inplace") - if errors is not lib.no_default: - warnings.warn( - f"The 'errors' keyword in {type(self).__name__}.where and mask is " - "deprecated and will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if axis is not None: axis = self._get_axis_number(axis) @@ -9135,6 +9127,7 @@ def _where( other=other, cond=cond, align=align, + errors=errors, ) result = self._constructor(new_data) return result.__finalize__(self) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index ec3a9e8b493e3..b338a61a8bbb1 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -327,7 +327,7 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T return type(self)(result_arrays, self._axes) - def where(self: T, other, cond, align: bool) -> T: + def where(self: T, other, cond, align: bool, errors) -> T: if align: align_keys = ["other", "cond"] else: @@ -339,13 +339,14 @@ def where(self: T, other, cond, align: bool) -> T: align_keys=align_keys, other=other, cond=cond, + errors=errors, ) # TODO what is this used for? # def setitem(self, indexer, value) -> ArrayManager: # return self.apply_with_block("setitem", indexer=indexer, value=value) - def putmask(self, mask, new, align: bool = True): + def putmask(self, mask, new, align: bool = True, errors=lib.no_default): if align: align_keys = ["new", "mask"] else: @@ -357,6 +358,7 @@ def putmask(self, mask, new, align: bool = True): align_keys=align_keys, mask=mask, new=new, + errors=errors, ) def diff(self: T, n: int, axis: int) -> T: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9f242226739ec..48e87702c8d90 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -932,7 +932,7 @@ def setitem(self, indexer, value): return self - def putmask(self, mask, new) -> list[Block]: + def putmask(self, mask, new, errors) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -960,7 +960,7 @@ def putmask(self, mask, new) -> list[Block]: if not self.is_object and is_valid_na_for_dtype(new, self.dtype): new = self.fill_value - if self._can_hold_element(new): + if self._can_hold_element(new) or errors == "raise": putmask_without_repeat(values.T, mask, new) return [self] @@ -968,7 +968,7 @@ def putmask(self, mask, new) -> list[Block]: # using putmask with object dtype will incorrectly cast to object # Having excluded self._can_hold_element, we know we cannot operate # in-place, so we are safe using `where` - return self.where(new, ~mask) + return self.where(new, ~mask, errors=errors) elif noop: return [self] @@ -978,7 +978,8 @@ def putmask(self, mask, new) -> list[Block]: if not is_list_like(new): # putmask_smart can't save us the need to cast - return self.coerce_to_target_dtype(new).putmask(mask, new) + blk = self.coerce_to_target_dtype(new) + return blk.putmask(mask, new, errors=errors) # This differs from # `self.coerce_to_target_dtype(new).putmask(mask, new)` @@ -999,7 +1000,7 @@ def putmask(self, mask, new) -> list[Block]: n = new[:, i : i + 1] submask = orig_mask[:, i : i + 1] - rbs = nb.putmask(submask, n) + rbs = nb.putmask(submask, n, errors=errors) res_blocks.extend(rbs) return res_blocks @@ -1141,7 +1142,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo return [self.make_block(new_values)] - def where(self, other, cond) -> list[Block]: + def where(self, other, cond, errors) -> list[Block]: """ evaluate the block; return result block(s) from the result @@ -1178,10 +1179,10 @@ def where(self, other, cond) -> list[Block]: # GH-39595: Always return a copy result = values.copy() - elif not self._can_hold_element(other): + elif not self._can_hold_element(other) and errors != "raise": # we cannot coerce, return a compat dtype block = self.coerce_to_target_dtype(other) - blocks = block.where(orig_other, cond) + blocks = block.where(orig_other, cond, errors=errors) return self._maybe_downcast(blocks, "infer") else: @@ -1330,7 +1331,7 @@ class EABackedBlock(Block): values: ExtensionArray - def where(self, other, cond) -> list[Block]: + def where(self, other, cond, errors) -> list[Block]: arr = self.values.T cond = extract_bool_array(cond) @@ -1350,6 +1351,9 @@ def where(self, other, cond) -> list[Block]: try: res_values = arr._where(cond, other).T except (ValueError, TypeError) as err: + if errors == "raise": + raise + if isinstance(err, ValueError): # TODO(2.0): once DTA._validate_setitem_value deprecation # is enforced, stop catching ValueError here altogether @@ -1362,8 +1366,24 @@ def where(self, other, cond) -> list[Block]: if blk.dtype == _dtype_obj: # For now at least only support casting e.g. # Interval[int64]->Interval[float64] - raise - return blk.where(other, cond) + if errors is lib.no_default: + # TODO: no tests get here 2022-01-04 + warnings.warn( + "The default behavior of 'where' when setting an " + f"incompatible value into an array with dtype={self.dtype} " + "is deprecated. In a future version, the array and " + "value will be coerced to a common dtype (matching " + "the behavior of other dtypes). To retain the old " + "behavior, pass `errors='raise'`. To get the future " + "behavior, pass `errors='coerce'`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise + else: + # "raise" case handled above + assert errors == "coerce" + return blk.where(other, cond, errors=errors) elif isinstance(self, NDArrayBackedExtensionBlock): # NB: not (yet) the same as @@ -1372,18 +1392,51 @@ def where(self, other, cond) -> list[Block]: # TODO: don't special-case # Note: this is the main place where the fallback logic # is different from EABackedBlock.putmask. - raise + if errors is lib.no_default: + warnings.warn( + "The default behavior of 'where' when setting an " + f"incompatible value into an array with dtype={self.dtype} " + "is deprecated. In a future version, the array and " + "value will be coerced to a common dtype (matching " + "the behavior of other dtypes). To retain the old " + "behavior, pass `errors='raise'`. To get the future " + "behavior, pass `errors='coerce'`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise + else: + # "raise" case handled above + assert errors == "coerce" blk = self.coerce_to_target_dtype(other) - nbs = blk.where(other, cond) + nbs = blk.where(other, cond, errors=errors) return self._maybe_downcast(nbs, "infer") else: - raise + if errors is lib.no_default: + warnings.warn( + "The default behavior of 'where' when setting an " + f"incompatible value into an array with dtype={self.dtype} " + "is deprecated. In a future version, the array and value " + "will be coerced to a common dtype (matching the behavior " + "of other dtypes). To retain the old behavior, pass " + "`errors='raise'`. To get the future behavior, pass " + "`errors='coerce'`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise + else: + # "raise" case handled above + assert errors == "coerce" + blk = self.coerce_to_target_dtype(other) + nbs = blk.where(other, cond, errors=errors) + return self._maybe_downcast(nbs, "infer") nb = self.make_block_same_class(res_values) return [nb] - def putmask(self, mask, new) -> list[Block]: + def putmask(self, mask, new, errors) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -1397,6 +1450,9 @@ def putmask(self, mask, new) -> list[Block]: # Caller is responsible for ensuring matching lengths values._putmask(mask, new) except (TypeError, ValueError) as err: + if errors == "raise": + raise + if isinstance(err, ValueError) and "Timezones don't match" not in str(err): # TODO(2.0): remove catching ValueError at all since # DTA raising here is deprecated @@ -1409,17 +1465,51 @@ def putmask(self, mask, new) -> list[Block]: if blk.dtype == _dtype_obj: # For now at least, only support casting e.g. # Interval[int64]->Interval[float64], - raise - return blk.putmask(mask, new) + if errors is lib.no_default: + # TODO: no tests get here 2022-01-04 + warnings.warn( + "The default behavior of 'putmask' when setting an " + f"incompatible value into an array with dtype={self.dtype} " + "is deprecated. In a future version, the array and " + "value will be coerced to a common dtype (matching " + "the behavior of other dtypes). To retain the old " + "behavior, pass `errors='raise'`. To get the future " + "behavior, pass `errors='coerce'`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise + else: + # "raise" case handled above + assert errors == "coerce" + return blk.putmask(mask, new, errors=errors) elif isinstance(self, NDArrayBackedExtensionBlock): # NB: not (yet) the same as # isinstance(values, NDArrayBackedExtensionArray) blk = self.coerce_to_target_dtype(new) - return blk.putmask(mask, new) + return blk.putmask(mask, new, errors=errors) else: - raise + if errors is lib.no_default: + warnings.warn( + "The default behavior of 'putmask' when setting an " + f"incompatible value into an array with dtype={self.dtype} " + "is deprecated. In a future version, the array and value " + "will be coerced to a common dtype (matching the behavior " + "of other dtypes). To retain the old behavior, pass " + "`errors='raise'`. To get the future behavior, pass " + "`errors='coerce'`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise + else: + # "raise" case handled above + assert errors == "coerce" + # TODO: no tests get here 2022-01-04 + blk = self.coerce_to_target_dtype(new) + return blk.putmask(mask, new, errors=errors) return [self] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3e0b62da64f42..7e3846c638907 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -314,7 +314,7 @@ def apply( out = type(self).from_blocks(result_blocks, self.axes) return out - def where(self: T, other, cond, align: bool) -> T: + def where(self: T, other, cond, align: bool, errors) -> T: if align: align_keys = ["other", "cond"] else: @@ -326,6 +326,7 @@ def where(self: T, other, cond, align: bool) -> T: align_keys=align_keys, other=other, cond=cond, + errors=errors, ) def setitem(self: T, indexer, value) -> T: @@ -336,7 +337,7 @@ def setitem(self: T, indexer, value) -> T: """ return self.apply("setitem", indexer=indexer, value=value) - def putmask(self, mask, new, align: bool = True): + def putmask(self, mask, new, align: bool = True, errors=lib.no_default): if align: align_keys = ["new", "mask"] @@ -349,6 +350,7 @@ def putmask(self, mask, new, align: bool = True): align_keys=align_keys, mask=mask, new=new, + errors=errors, ) def diff(self: T, n: int, axis: int) -> T: diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 617d1861fa65a..a4a9cedd489ce 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -260,8 +260,12 @@ def test_where_other_categorical(self): def test_where_new_category_raises(self): ser = Series(Categorical(["a", "b", "c"])) msg = "Cannot setitem on a Categorical with a new category" + warn_msg = "The default behavior of 'where' when" with pytest.raises(TypeError, match=msg): - ser.where([True, False, True], "d") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + ser.where([True, False, True], "d") + with pytest.raises(TypeError, match=msg): + ser.where([True, False, True], "d", errors="raise") def test_where_ordered_differs_rasies(self): ser = Series( @@ -270,8 +274,12 @@ def test_where_ordered_differs_rasies(self): other = Categorical( ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True ) + warn_msg = "The default behavior of 'where' when" + with pytest.raises(TypeError, match="without identical categories"): + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + ser.where([True, False, True], other) with pytest.raises(TypeError, match="without identical categories"): - ser.where([True, False, True], other) + ser.where([True, False, True], other, errors="raise") class TestContains: diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 197c3ac9bd225..93ab40e28a9a9 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -886,11 +886,18 @@ def test_where_period_invalid_na(frame_or_series, as_cat, request): else: msg = "value should be a 'Period'" + warn_msg = "The default behavior of '(where|putmask)' when" with pytest.raises(TypeError, match=msg): - obj.where(mask, tdnat) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + obj.where(mask, tdnat) + with pytest.raises(TypeError, match=msg): + obj.where(mask, tdnat, errors="raise") with pytest.raises(TypeError, match=msg): - obj.mask(mask, tdnat) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + obj.mask(mask, tdnat) + with pytest.raises(TypeError, match=msg): + obj.mask(mask, tdnat, errors="raise") def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype): @@ -911,13 +918,21 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype): ] ) + warn_msg = "The default behavior of '(putmask|where)' when setting" + for null in tm.NP_NAT_OBJECTS + [pd.NaT]: # NaT is an NA value that we should *not* cast to pd.NA dtype with pytest.raises(TypeError, match=msg): - obj.where(mask, null) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + obj.where(mask, null) + with pytest.raises(TypeError, match=msg): + obj.where(mask, null, errors="raise") with pytest.raises(TypeError, match=msg): - obj.mask(mask, null) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + obj.mask(mask, null) + with pytest.raises(TypeError, match=msg): + obj.mask(mask, null, errors="raise") @given(data=OPTIONAL_ONE_OF_ALL) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 19f61a0a2d6fc..55ba9fbaf72f4 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -151,18 +151,14 @@ def test_fillna_consistency(self): ) tm.assert_series_equal(result, expected) - msg = "The 'errors' keyword in " - with tm.assert_produces_warning(FutureWarning, match=msg): - # where (we ignore the errors=) - result = ser.where( - [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" - ) + result = ser.where( + [True, False], Timestamp("20130101", tz="US/Eastern"), errors="coerce" + ) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.where( - [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" - ) + result = ser.where( + [True, False], Timestamp("20130101", tz="US/Eastern"), errors="coerce" + ) tm.assert_series_equal(result, expected) # with a non-datetime