From f8766e4668e1bab0abcc8f81b6d12b59de9b98aa Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 20 Oct 2020 15:54:45 -0700 Subject: [PATCH 1/4] CLN: de-duplicate datetimelike validators by standardizing exception message --- pandas/core/arrays/datetimelike.py | 57 ++++++++++++------- pandas/tests/arrays/test_datetimelike.py | 4 +- pandas/tests/indexes/datetimes/test_insert.py | 5 +- .../tests/indexes/timedeltas/test_insert.py | 5 +- pandas/tests/indexing/test_coercion.py | 6 +- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/internals/test_internals.py | 2 +- 7 files changed, 51 insertions(+), 30 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index de0a246861961..015923c83f3fe 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -477,7 +477,7 @@ def _validate_fill_value(self, fill_value): f"Got '{str(fill_value)}'." ) try: - fill_value = self._validate_scalar(fill_value, msg) + fill_value = self._validate_scalar(fill_value) except TypeError as err: raise ValueError(msg) from err rv = self._unbox(fill_value) @@ -510,17 +510,16 @@ def _validate_shift_value(self, fill_value): return self._unbox(fill_value) - def _validate_scalar(self, value, msg: Optional[str] = None): + def _validate_scalar(self, value, allow_listlike: bool = False): """ Validate that the input value can be cast to our scalar_type. Parameters ---------- value : object - msg : str, optional. - Message to raise in TypeError on invalid input. - If not provided, `value` is cast to a str and used - as the message. + allow_listlike: bool, default False + When raising an exception, whether the message should say + listlike inputs are allowed. Returns ------- @@ -531,6 +530,7 @@ def _validate_scalar(self, value, msg: Optional[str] = None): try: value = self._scalar_from_string(value) except ValueError as err: + msg = self._validation_error_message(value, allow_listlike) raise TypeError(msg) from err elif is_valid_nat_for_dtype(value, self.dtype): @@ -542,12 +542,38 @@ def _validate_scalar(self, value, msg: Optional[str] = None): value = self._scalar_type(value) # type: ignore[call-arg] else: - if msg is None: - msg = str(value) + msg = self._validation_error_message(value, allow_listlike) raise TypeError(msg) return value + def _validation_error_message(self, value, allow_listlike: bool = False) -> str: + """ + Construct an exception message on validation error. + + Some methods allow only scalar inputs, while others allow either scalar + or listlike. + + Parameters + ---------- + allow_listlike: bool, default False + + Returns + ------- + str + """ + if allow_listlike: + msg = ( + f"value should be a '{self._scalar_type.__name__}', 'NaT', " + f"or array of those. Got '{type(value).__name__}' instead." + ) + else: + msg = ( + f"value should be a '{self._scalar_type.__name__}' or 'NaT'. " + f"Got '{type(value).__name__}' instead." + ) + return msg + def _validate_listlike(self, value, allow_object: bool = False): if isinstance(value, type(self)): return value @@ -584,9 +610,8 @@ def _validate_listlike(self, value, allow_object: bool = False): return value def _validate_searchsorted_value(self, value): - msg = "searchsorted requires compatible dtype or scalar" if not is_list_like(value): - value = self._validate_scalar(value, msg) + value = self._validate_scalar(value, True) else: value = self._validate_listlike(value) @@ -594,20 +619,15 @@ def _validate_searchsorted_value(self, value): return self._rebox_native(rv) def _validate_setitem_value(self, value): - msg = ( - f"'value' should be a '{self._scalar_type.__name__}', 'NaT', " - f"or array of those. Got '{type(value).__name__}' instead." - ) if is_list_like(value): value = self._validate_listlike(value) else: - value = self._validate_scalar(value, msg) + value = self._validate_scalar(value, True) return self._unbox(value, setitem=True) def _validate_insert_value(self, value): - msg = f"cannot insert {type(self).__name__} with incompatible label" - value = self._validate_scalar(value, msg) + value = self._validate_scalar(value) self._check_compatible_with(value, setitem=True) # TODO: if we dont have compat, should we raise or astype(object)? @@ -617,9 +637,8 @@ def _validate_insert_value(self, value): # to check for freq. def _validate_where_value(self, other): - msg = f"Where requires matching dtype, not {type(other)}" if not is_list_like(other): - other = self._validate_scalar(other, msg) + other = self._validate_scalar(other) else: other = self._validate_listlike(other) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index ed7c7c31c6b8d..3d34948018be4 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -399,7 +399,7 @@ def test_setitem_raises(self): with pytest.raises(IndexError, match="index 12 is out of bounds"): arr[12] = val - with pytest.raises(TypeError, match="'value' should be a.* 'object'"): + with pytest.raises(TypeError, match="value should be a.* 'object'"): arr[0] = object() msg = "cannot set using a list-like indexer with a different length" @@ -1032,7 +1032,7 @@ def test_casting_nat_setitem_array(array, casting_nats): ) def test_invalid_nat_setitem_array(array, non_casting_nats): msg = ( - "'value' should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. " + "value should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. " "Got '(timedelta64|datetime64|int)' instead." ) diff --git a/pandas/tests/indexes/datetimes/test_insert.py b/pandas/tests/indexes/datetimes/test_insert.py index b4f6cc3798f4f..d2c999f61b4bb 100644 --- a/pandas/tests/indexes/datetimes/test_insert.py +++ b/pandas/tests/indexes/datetimes/test_insert.py @@ -21,7 +21,8 @@ def test_insert_nat(self, tz, null): @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) def test_insert_invalid_na(self, tz): idx = DatetimeIndex(["2017-01-01"], tz=tz) - with pytest.raises(TypeError, match="incompatible label"): + msg = "value should be a 'Timestamp' or 'NaT'. Got 'timedelta64' instead." + with pytest.raises(TypeError, match=msg): idx.insert(0, np.timedelta64("NaT")) def test_insert_empty_preserves_freq(self, tz_naive_fixture): @@ -174,7 +175,7 @@ def test_insert_mismatched_types_raises(self, tz_aware_fixture, item): tz = tz_aware_fixture dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) - msg = "incompatible label" + msg = "value should be a 'Timestamp' or 'NaT'. Got '.*' instead" with pytest.raises(TypeError, match=msg): dti.insert(1, item) diff --git a/pandas/tests/indexes/timedeltas/test_insert.py b/pandas/tests/indexes/timedeltas/test_insert.py index 1ebc0a4b1eca0..66fec2310e50c 100644 --- a/pandas/tests/indexes/timedeltas/test_insert.py +++ b/pandas/tests/indexes/timedeltas/test_insert.py @@ -79,7 +79,8 @@ def test_insert_nat(self, null): def test_insert_invalid_na(self): idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") - with pytest.raises(TypeError, match="incompatible label"): + msg = r"value should be a 'Timedelta' or 'NaT'\. Got 'datetime64' instead\." + with pytest.raises(TypeError, match=msg): idx.insert(0, np.datetime64("NaT")) @pytest.mark.parametrize( @@ -89,7 +90,7 @@ def test_insert_mismatched_types_raises(self, item): # GH#33703 dont cast these to td64 tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx") - msg = "incompatible label" + msg = r"value should be a 'Timedelta' or 'NaT'\. Got '.*' instead\." with pytest.raises(TypeError, match=msg): tdi.insert(1, item) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 752ecd47fe089..ef62b454ccaaa 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -444,7 +444,7 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype): with pytest.raises(TypeError, match=msg): obj.insert(1, pd.Timestamp("2012-01-01", tz="Asia/Tokyo")) - msg = "cannot insert DatetimeArray with incompatible label" + msg = "value should be a 'Timestamp' or 'NaT'. Got 'int' instead." with pytest.raises(TypeError, match=msg): obj.insert(1, 1) @@ -461,12 +461,12 @@ def test_insert_index_timedelta64(self): ) # ToDo: must coerce to object - msg = "cannot insert TimedeltaArray with incompatible label" + msg = "value should be a 'Timedelta' or 'NaT'. Got 'Timestamp' instead." with pytest.raises(TypeError, match=msg): obj.insert(1, pd.Timestamp("2012-01-01")) # ToDo: must coerce to object - msg = "cannot insert TimedeltaArray with incompatible label" + msg = "value should be a 'Timedelta' or 'NaT'. Got 'int' instead." with pytest.raises(TypeError, match=msg): obj.insert(1, 1) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 337ec683ee745..1fb5af67a64d8 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -335,7 +335,7 @@ def test_partial_set_invalid(self): df = orig.copy() # don't allow not string inserts - msg = "cannot insert DatetimeArray with incompatible label" + msg = r"value should be a 'Timestamp' or 'NaT'\. Got '.*' instead\." with pytest.raises(TypeError, match=msg): df.loc[100.0, :] = df.iloc[0] diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index cd2f5a903d8cc..896c2dead634f 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1095,7 +1095,7 @@ def test_datetime_block_can_hold_element(self): assert not block._can_hold_element(val) msg = ( - "'value' should be a 'Timestamp', 'NaT', " + "value should be a 'Timestamp', 'NaT', " "or array of those. Got 'date' instead." ) with pytest.raises(TypeError, match=msg): From 77f2a0d59f385e3672388fc592db0f34e6610257 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 20 Oct 2020 15:56:47 -0700 Subject: [PATCH 2/4] CLN: remove now-unecessary argument --- pandas/core/arrays/datetimelike.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 015923c83f3fe..02c56840beefc 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -419,7 +419,7 @@ def _from_factorized(cls, values, original): # Validation Methods # TODO: try to de-duplicate these, ensure identical behavior - def _validate_comparison_value(self, other, opname: str): + def _validate_comparison_value(self, other): if isinstance(other, str): try: # GH#18435 strings get a pass from tzawareness compat @@ -867,7 +867,7 @@ def _cmp_method(self, other, op): return op(self.ravel(), other.ravel()).reshape(self.shape) try: - other = self._validate_comparison_value(other, f"__{op.__name__}__") + other = self._validate_comparison_value(other) except InvalidComparison: return invalid_comparison(self, other, op) From 7252e62568d1171383935128204520f21723715a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 20 Oct 2020 16:18:08 -0700 Subject: [PATCH 3/4] pass allow_listlike=True in validate_where_value --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 02c56840beefc..34273b726092c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -638,7 +638,7 @@ def _validate_insert_value(self, value): def _validate_where_value(self, other): if not is_list_like(other): - other = self._validate_scalar(other) + other = self._validate_scalar(other, True) else: other = self._validate_listlike(other) From 65bfe24916a6e58788a48a701f0addee3e5eb841 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Oct 2020 08:25:51 -0700 Subject: [PATCH 4/4] mypy fixup --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index dfe1ed3ebd58b..7aa58e5268948 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -429,7 +429,7 @@ def _validate_comparison_value(self, other): raise InvalidComparison(other) if isinstance(other, self._recognized_scalars) or other is NaT: - other = self._scalar_type(other) # type: ignore[call-arg] + other = self._scalar_type(other) try: self._check_compatible_with(other) except TypeError as err: