From c0797f9019ddc5639c94d8d988b4ee8a7ce5e002 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Jan 2022 18:36:58 -0800 Subject: [PATCH 1/2] REF: implement LossySetitemError --- pandas/core/arrays/interval.py | 3 +- pandas/core/dtypes/cast.py | 50 ++++++++++++++++++++-------------- pandas/core/frame.py | 5 ++-- pandas/core/indexes/base.py | 10 ++++--- pandas/core/series.py | 3 +- 5 files changed, 43 insertions(+), 28 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 33732bcaca733..d23910c37b52b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -39,6 +39,7 @@ from pandas.errors import IntCastingNaNError from pandas.util._decorators import Appender +from pandas.core.dtypes.cast import LossySetitemError from pandas.core.dtypes.common import ( is_categorical_dtype, is_dtype_equal, @@ -1081,7 +1082,7 @@ def _validate_listlike(self, value): try: self.left._validate_fill_value(value_left) - except (ValueError, TypeError) as err: + except (LossySetitemError, TypeError) as err: msg = ( "'value' should be a compatible interval type, " f"got {type(value)} instead." diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fe5b464a5a18d..7d4747c5033c2 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1923,6 +1923,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: arr._validate_setitem_value(element) return True except (ValueError, TypeError): + # TODO(2.0): stop catching ValueError for tzaware, see + # _catch_deprecated_value_error return False # This is technically incorrect, but maintains the behavior of @@ -1932,7 +1934,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: try: np_can_hold_element(dtype, element) return True - except (TypeError, ValueError): + except (TypeError, LossySetitemError): return False @@ -1962,7 +1964,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if isinstance(element, range): if _dtype_can_hold_range(element, dtype): return element - raise ValueError + raise LossySetitemError elif is_integer(element) or (is_float(element) and element.is_integer()): # e.g. test_setitem_series_int8 if we have a python int 1 @@ -1971,7 +1973,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: info = np.iinfo(dtype) if info.min <= element <= info.max: return element - raise ValueError + raise LossySetitemError if tipo is not None: if tipo.kind not in ["i", "u"]: @@ -1985,10 +1987,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # np.putmask, whereas the raw values cannot. # see TestSetitemFloatNDarrayIntoIntegerSeries return casted - raise ValueError + raise LossySetitemError # Anything other than integer we cannot hold - raise ValueError + raise LossySetitemError elif ( dtype.kind == "u" and isinstance(element, np.ndarray) @@ -2000,37 +2002,37 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # TODO: faster to check (element >=0).all()? potential # itemsize issues there? return casted - raise ValueError + raise LossySetitemError elif dtype.itemsize < tipo.itemsize: - raise ValueError + raise LossySetitemError elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype; we can put this into an ndarray # losslessly iff it has no NAs if element._hasna: - raise ValueError + raise LossySetitemError return element return element - raise ValueError + raise LossySetitemError elif dtype.kind == "f": if tipo is not None: # TODO: itemsize check? if tipo.kind not in ["f", "i", "u"]: # Anything other than float/integer we cannot hold - raise ValueError + raise LossySetitemError elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype or FloatingDtype; # we can put this into an ndarray losslessly iff it has no NAs if element._hasna: - raise ValueError + raise LossySetitemError return element return element if lib.is_integer(element) or lib.is_float(element): return element - raise ValueError + raise LossySetitemError elif dtype.kind == "c": if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): @@ -2042,13 +2044,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if casted == element: return casted # otherwise e.g. overflow see test_32878_complex_itemsize - raise ValueError + raise LossySetitemError if tipo is not None: if tipo.kind in ["c", "f", "i", "u"]: return element - raise ValueError - raise ValueError + raise LossySetitemError + raise LossySetitemError elif dtype.kind == "b": if tipo is not None: @@ -2057,12 +2059,12 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # i.e. we have a BooleanArray if element._hasna: # i.e. there are pd.NA elements - raise ValueError + raise LossySetitemError return element - raise ValueError + raise LossySetitemError if lib.is_bool(element): return element - raise ValueError + raise LossySetitemError elif dtype.kind == "S": # TODO: test tests.frame.methods.test_replace tests get here, @@ -2070,10 +2072,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if tipo is not None: if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: return element - raise ValueError + raise LossySetitemError if isinstance(element, bytes) and len(element) <= dtype.itemsize: return element - raise ValueError + raise LossySetitemError raise NotImplementedError(dtype) @@ -2087,3 +2089,11 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: if not len(rng): return True return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) + + +class LossySetitemError(Exception): + """ + Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. + """ + + pass diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d76af1ce42546..dab8840e39ad3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -93,6 +93,7 @@ ) from pandas.core.dtypes.cast import ( + LossySetitemError, construct_1d_arraylike_from_scalar, construct_2d_arraylike_from_scalar, find_common_type, @@ -3885,9 +3886,9 @@ def _set_value( loc = self.index.get_loc(index) # series._set_value will do validation that may raise TypeError - # or ValueError + # or LossySetitemError series._set_value(loc, value, takeable=True) - except (KeyError, TypeError, ValueError): + except (KeyError, TypeError, LossySetitemError): # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 99396c6986043..ee2f81ef78e1b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -68,6 +68,7 @@ from pandas.core.dtypes.astype import astype_nansafe from pandas.core.dtypes.cast import ( + LossySetitemError, can_hold_element, common_dtype_categorical_compat, ensure_dtype_can_hold_na, @@ -5064,12 +5065,13 @@ def _validate_fill_value(self, value): """ dtype = self.dtype if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]: + # return np_can_hold_element(dtype, value) try: return np_can_hold_element(dtype, value) - except ValueError as err: + except LossySetitemError as err: # re-raise as TypeError for consistency raise TypeError from err - if not can_hold_element(self._values, value): + elif not can_hold_element(self._values, value): raise TypeError return value @@ -5287,7 +5289,7 @@ def putmask(self, mask, value) -> Index: value = self._na_value try: converted = self._validate_fill_value(value) - except (ValueError, TypeError) as err: + except (LossySetitemError, ValueError, TypeError) as err: if is_object_dtype(self): # pragma: no cover raise err @@ -6712,7 +6714,7 @@ def insert(self, loc: int, item) -> Index: return type(self)._simple_new(res_values, name=self.name) else: item = self._validate_fill_value(item) - except (TypeError, ValueError): + except (TypeError, ValueError, LossySetitemError): # e.g. trying to insert an integer into a DatetimeIndex # We cannot keep the same dtype, so cast to the (often object) # minimal shared dtype before doing the insert. diff --git a/pandas/core/series.py b/pandas/core/series.py index 596953652d2ff..047db74ea2332 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -62,6 +62,7 @@ ) from pandas.core.dtypes.cast import ( + LossySetitemError, convert_dtypes, maybe_box_native, maybe_cast_pointwise_result, @@ -1102,7 +1103,7 @@ def __setitem__(self, key, value) -> None: # GH#12862 adding a new key to the Series self.loc[key] = value - except (TypeError, ValueError): + except (TypeError, ValueError, LossySetitemError): # The key was OK, but we cannot set the value losslessly indexer = self.index.get_loc(key) self._set_values(indexer, value) From 10aee4196844bd390858fdc9415c67e93728589a Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Jan 2022 07:31:20 -0800 Subject: [PATCH 2/2] update catching in Block.where --- pandas/core/internals/blocks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d5bae63976e63..2e6492b47cb08 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -36,6 +36,7 @@ from pandas.core.dtypes.astype import astype_array_safe from pandas.core.dtypes.cast import ( + LossySetitemError, can_hold_element, find_result_type, maybe_downcast_to_dtype, @@ -1191,7 +1192,7 @@ def where(self, other, cond) -> list[Block]: # but this gets us back 'casted' which we will re-use below; # without using 'casted', expressions.where may do unwanted upcasts. casted = np_can_hold_element(values.dtype, other) - except (ValueError, TypeError): + except (ValueError, TypeError, LossySetitemError): # we cannot coerce, return a compat dtype block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond)