Skip to content

REF: implement LossySetitemError #45672

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from pandas.errors import IntCastingNaNError
from pandas.util._decorators import Appender

from pandas.core.dtypes.cast import LossySetitemError
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dtype_equal,
Expand Down Expand Up @@ -1081,7 +1082,7 @@ def _validate_listlike(self, value):

try:
self.left._validate_fill_value(value_left)
except (ValueError, TypeError) as err:
except (LossySetitemError, TypeError) as err:
msg = (
"'value' should be a compatible interval type, "
f"got {type(value)} instead."
Expand Down
52 changes: 31 additions & 21 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1924,6 +1924,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
arr._validate_setitem_value(element)
return True
except (ValueError, TypeError):
# TODO(2.0): stop catching ValueError for tzaware, see
# _catch_deprecated_value_error
return False

# This is technically incorrect, but maintains the behavior of
Expand All @@ -1933,7 +1935,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
try:
np_can_hold_element(dtype, element)
return True
except (TypeError, ValueError):
except (TypeError, LossySetitemError):
return False


Expand Down Expand Up @@ -1963,7 +1965,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
if isinstance(element, range):
if _dtype_can_hold_range(element, dtype):
return element
raise ValueError
raise LossySetitemError

elif is_integer(element) or (is_float(element) and element.is_integer()):
# e.g. test_setitem_series_int8 if we have a python int 1
Expand All @@ -1972,7 +1974,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
info = np.iinfo(dtype)
if info.min <= element <= info.max:
return dtype.type(element)
raise ValueError
raise LossySetitemError

if tipo is not None:
if tipo.kind not in ["i", "u"]:
Expand All @@ -1986,10 +1988,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
# np.putmask, whereas the raw values cannot.
# see TestSetitemFloatNDarrayIntoIntegerSeries
return casted
raise ValueError
raise LossySetitemError

# Anything other than integer we cannot hold
raise ValueError
raise LossySetitemError
elif (
dtype.kind == "u"
and isinstance(element, np.ndarray)
Expand All @@ -2001,31 +2003,31 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
# TODO: faster to check (element >=0).all()? potential
# itemsize issues there?
return casted
raise ValueError
raise LossySetitemError
elif dtype.itemsize < tipo.itemsize:
raise ValueError
raise LossySetitemError
elif not isinstance(tipo, np.dtype):
# i.e. nullable IntegerDtype; we can put this into an ndarray
# losslessly iff it has no NAs
if element._hasna:
raise ValueError
raise LossySetitemError
return element

return element

raise ValueError
raise LossySetitemError

elif dtype.kind == "f":
if tipo is not None:
# TODO: itemsize check?
if tipo.kind not in ["f", "i", "u"]:
# Anything other than float/integer we cannot hold
raise ValueError
raise LossySetitemError
elif not isinstance(tipo, np.dtype):
# i.e. nullable IntegerDtype or FloatingDtype;
# we can put this into an ndarray losslessly iff it has no NAs
if element._hasna:
raise ValueError
raise LossySetitemError
return element
elif tipo.itemsize > dtype.itemsize:
if isinstance(element, np.ndarray):
Expand All @@ -2034,13 +2036,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
# TODO(np>=1.20): we can just use np.array_equal with equal_nan
if array_equivalent(casted, element):
return casted
raise ValueError
raise LossySetitemError

return element

if lib.is_integer(element) or lib.is_float(element):
return element
raise ValueError
raise LossySetitemError

elif dtype.kind == "c":
if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element):
Expand All @@ -2052,13 +2054,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
if casted == element:
return casted
# otherwise e.g. overflow see test_32878_complex_itemsize
raise ValueError
raise LossySetitemError

if tipo is not None:
if tipo.kind in ["c", "f", "i", "u"]:
return element
raise ValueError
raise ValueError
raise LossySetitemError
raise LossySetitemError

elif dtype.kind == "b":
if tipo is not None:
Expand All @@ -2067,23 +2069,23 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
# i.e. we have a BooleanArray
if element._hasna:
# i.e. there are pd.NA elements
raise ValueError
raise LossySetitemError
return element
raise ValueError
raise LossySetitemError
if lib.is_bool(element):
return element
raise ValueError
raise LossySetitemError

elif dtype.kind == "S":
# TODO: test tests.frame.methods.test_replace tests get here,
# need more targeted tests. xref phofl has a PR about this
if tipo is not None:
if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize:
return element
raise ValueError
raise LossySetitemError
if isinstance(element, bytes) and len(element) <= dtype.itemsize:
return element
raise ValueError
raise LossySetitemError

raise NotImplementedError(dtype)

Expand All @@ -2097,3 +2099,11 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:
if not len(rng):
return True
return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype)


class LossySetitemError(Exception):
"""
Raised when trying to do a __setitem__ on an np.ndarray that is not lossless.
"""

pass
7 changes: 4 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
)

from pandas.core.dtypes.cast import (
LossySetitemError,
can_hold_element,
construct_1d_arraylike_from_scalar,
construct_2d_arraylike_from_scalar,
Expand Down Expand Up @@ -3882,11 +3883,11 @@ def _set_value(
series = self._get_item_cache(col)
loc = self.index.get_loc(index)

# setitem_inplace will do validation that may raise TypeError
# or ValueError
# setitem_inplace will do validation that may raise TypeError,
# ValueError, or LossySetitemError
series._mgr.setitem_inplace(loc, value)

except (KeyError, TypeError, ValueError):
except (KeyError, TypeError, ValueError, LossySetitemError):
# set using a non-recursive method & reset the cache
if takeable:
self.iloc[index, col] = value
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@

from pandas.core.dtypes.astype import astype_nansafe
from pandas.core.dtypes.cast import (
LossySetitemError,
can_hold_element,
common_dtype_categorical_compat,
ensure_dtype_can_hold_na,
Expand Down Expand Up @@ -5071,12 +5072,13 @@ def _validate_fill_value(self, value):
"""
dtype = self.dtype
if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:
# return np_can_hold_element(dtype, value)
try:
return np_can_hold_element(dtype, value)
except ValueError as err:
except LossySetitemError as err:
# re-raise as TypeError for consistency
raise TypeError from err
if not can_hold_element(self._values, value):
elif not can_hold_element(self._values, value):
raise TypeError
return value

Expand Down Expand Up @@ -5294,7 +5296,7 @@ def putmask(self, mask, value) -> Index:
value = self._na_value
try:
converted = self._validate_fill_value(value)
except (ValueError, TypeError) as err:
except (LossySetitemError, ValueError, TypeError) as err:
if is_object_dtype(self): # pragma: no cover
raise err

Expand Down Expand Up @@ -6719,7 +6721,7 @@ def insert(self, loc: int, item) -> Index:
return type(self)._simple_new(res_values, name=self.name)
else:
item = self._validate_fill_value(item)
except (TypeError, ValueError):
except (TypeError, ValueError, LossySetitemError):
# e.g. trying to insert an integer into a DatetimeIndex
# We cannot keep the same dtype, so cast to the (often object)
# minimal shared dtype before doing the insert.
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from pandas.core.dtypes.astype import astype_array_safe
from pandas.core.dtypes.cast import (
LossySetitemError,
can_hold_element,
find_result_type,
maybe_downcast_to_dtype,
Expand Down Expand Up @@ -1191,7 +1192,7 @@ def where(self, other, cond) -> list[Block]:
# but this gets us back 'casted' which we will re-use below;
# without using 'casted', expressions.where may do unwanted upcasts.
casted = np_can_hold_element(values.dtype, other)
except (ValueError, TypeError):
except (ValueError, TypeError, LossySetitemError):
# we cannot coerce, return a compat dtype
block = self.coerce_to_target_dtype(other)
blocks = block.where(orig_other, cond)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
)

from pandas.core.dtypes.cast import (
LossySetitemError,
convert_dtypes,
maybe_box_native,
maybe_cast_pointwise_result,
Expand Down Expand Up @@ -1102,7 +1103,7 @@ def __setitem__(self, key, value) -> None:
# GH#12862 adding a new key to the Series
self.loc[key] = value

except (TypeError, ValueError):
except (TypeError, ValueError, LossySetitemError):
# The key was OK, but we cannot set the value losslessly
indexer = self.index.get_loc(key)
self._set_values(indexer, value)
Expand Down