diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 33732bcaca733..d23910c37b52b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -39,6 +39,7 @@ from pandas.errors import IntCastingNaNError from pandas.util._decorators import Appender +from pandas.core.dtypes.cast import LossySetitemError from pandas.core.dtypes.common import ( is_categorical_dtype, is_dtype_equal, @@ -1081,7 +1082,7 @@ def _validate_listlike(self, value): try: self.left._validate_fill_value(value_left) - except (ValueError, TypeError) as err: + except (LossySetitemError, TypeError) as err: msg = ( "'value' should be a compatible interval type, " f"got {type(value)} instead." diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 216dd1e65de3a..1645ee13724b3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1924,6 +1924,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: arr._validate_setitem_value(element) return True except (ValueError, TypeError): + # TODO(2.0): stop catching ValueError for tzaware, see + # _catch_deprecated_value_error return False # This is technically incorrect, but maintains the behavior of @@ -1933,7 +1935,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: try: np_can_hold_element(dtype, element) return True - except (TypeError, ValueError): + except (TypeError, LossySetitemError): return False @@ -1963,7 +1965,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if isinstance(element, range): if _dtype_can_hold_range(element, dtype): return element - raise ValueError + raise LossySetitemError elif is_integer(element) or (is_float(element) and element.is_integer()): # e.g. test_setitem_series_int8 if we have a python int 1 @@ -1972,7 +1974,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: info = np.iinfo(dtype) if info.min <= element <= info.max: return dtype.type(element) - raise ValueError + raise LossySetitemError if tipo is not None: if tipo.kind not in ["i", "u"]: @@ -1986,10 +1988,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # np.putmask, whereas the raw values cannot. # see TestSetitemFloatNDarrayIntoIntegerSeries return casted - raise ValueError + raise LossySetitemError # Anything other than integer we cannot hold - raise ValueError + raise LossySetitemError elif ( dtype.kind == "u" and isinstance(element, np.ndarray) @@ -2001,31 +2003,31 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # TODO: faster to check (element >=0).all()? potential # itemsize issues there? return casted - raise ValueError + raise LossySetitemError elif dtype.itemsize < tipo.itemsize: - raise ValueError + raise LossySetitemError elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype; we can put this into an ndarray # losslessly iff it has no NAs if element._hasna: - raise ValueError + raise LossySetitemError return element return element - raise ValueError + raise LossySetitemError elif dtype.kind == "f": if tipo is not None: # TODO: itemsize check? if tipo.kind not in ["f", "i", "u"]: # Anything other than float/integer we cannot hold - raise ValueError + raise LossySetitemError elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype or FloatingDtype; # we can put this into an ndarray losslessly iff it has no NAs if element._hasna: - raise ValueError + raise LossySetitemError return element elif tipo.itemsize > dtype.itemsize: if isinstance(element, np.ndarray): @@ -2034,13 +2036,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # TODO(np>=1.20): we can just use np.array_equal with equal_nan if array_equivalent(casted, element): return casted - raise ValueError + raise LossySetitemError return element if lib.is_integer(element) or lib.is_float(element): return element - raise ValueError + raise LossySetitemError elif dtype.kind == "c": if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): @@ -2052,13 +2054,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if casted == element: return casted # otherwise e.g. overflow see test_32878_complex_itemsize - raise ValueError + raise LossySetitemError if tipo is not None: if tipo.kind in ["c", "f", "i", "u"]: return element - raise ValueError - raise ValueError + raise LossySetitemError + raise LossySetitemError elif dtype.kind == "b": if tipo is not None: @@ -2067,12 +2069,12 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # i.e. we have a BooleanArray if element._hasna: # i.e. there are pd.NA elements - raise ValueError + raise LossySetitemError return element - raise ValueError + raise LossySetitemError if lib.is_bool(element): return element - raise ValueError + raise LossySetitemError elif dtype.kind == "S": # TODO: test tests.frame.methods.test_replace tests get here, @@ -2080,10 +2082,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if tipo is not None: if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: return element - raise ValueError + raise LossySetitemError if isinstance(element, bytes) and len(element) <= dtype.itemsize: return element - raise ValueError + raise LossySetitemError raise NotImplementedError(dtype) @@ -2097,3 +2099,11 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: if not len(rng): return True return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) + + +class LossySetitemError(Exception): + """ + Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. + """ + + pass diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a68a2f40d02f7..5674c118f63d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -93,6 +93,7 @@ ) from pandas.core.dtypes.cast import ( + LossySetitemError, can_hold_element, construct_1d_arraylike_from_scalar, construct_2d_arraylike_from_scalar, @@ -3882,11 +3883,11 @@ def _set_value( series = self._get_item_cache(col) loc = self.index.get_loc(index) - # setitem_inplace will do validation that may raise TypeError - # or ValueError + # setitem_inplace will do validation that may raise TypeError, + # ValueError, or LossySetitemError series._mgr.setitem_inplace(loc, value) - except (KeyError, TypeError, ValueError): + except (KeyError, TypeError, ValueError, LossySetitemError): # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 935d61447df7b..c57ee4fb7e79e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -68,6 +68,7 @@ from pandas.core.dtypes.astype import astype_nansafe from pandas.core.dtypes.cast import ( + LossySetitemError, can_hold_element, common_dtype_categorical_compat, ensure_dtype_can_hold_na, @@ -5071,12 +5072,13 @@ def _validate_fill_value(self, value): """ dtype = self.dtype if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]: + # return np_can_hold_element(dtype, value) try: return np_can_hold_element(dtype, value) - except ValueError as err: + except LossySetitemError as err: # re-raise as TypeError for consistency raise TypeError from err - if not can_hold_element(self._values, value): + elif not can_hold_element(self._values, value): raise TypeError return value @@ -5294,7 +5296,7 @@ def putmask(self, mask, value) -> Index: value = self._na_value try: converted = self._validate_fill_value(value) - except (ValueError, TypeError) as err: + except (LossySetitemError, ValueError, TypeError) as err: if is_object_dtype(self): # pragma: no cover raise err @@ -6719,7 +6721,7 @@ def insert(self, loc: int, item) -> Index: return type(self)._simple_new(res_values, name=self.name) else: item = self._validate_fill_value(item) - except (TypeError, ValueError): + except (TypeError, ValueError, LossySetitemError): # e.g. trying to insert an integer into a DatetimeIndex # We cannot keep the same dtype, so cast to the (often object) # minimal shared dtype before doing the insert. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d5bae63976e63..2e6492b47cb08 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -36,6 +36,7 @@ from pandas.core.dtypes.astype import astype_array_safe from pandas.core.dtypes.cast import ( + LossySetitemError, can_hold_element, find_result_type, maybe_downcast_to_dtype, @@ -1191,7 +1192,7 @@ def where(self, other, cond) -> list[Block]: # but this gets us back 'casted' which we will re-use below; # without using 'casted', expressions.where may do unwanted upcasts. casted = np_can_hold_element(values.dtype, other) - except (ValueError, TypeError): + except (ValueError, TypeError, LossySetitemError): # we cannot coerce, return a compat dtype block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond) diff --git a/pandas/core/series.py b/pandas/core/series.py index a4fcc1e0b1b12..e4ba9ef2825e3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -62,6 +62,7 @@ ) from pandas.core.dtypes.cast import ( + LossySetitemError, convert_dtypes, maybe_box_native, maybe_cast_pointwise_result, @@ -1102,7 +1103,7 @@ def __setitem__(self, key, value) -> None: # GH#12862 adding a new key to the Series self.loc[key] = value - except (TypeError, ValueError): + except (TypeError, ValueError, LossySetitemError): # The key was OK, but we cannot set the value losslessly indexer = self.index.get_loc(key) self._set_values(indexer, value)