From 180060070f0cb0eaa581b1016a65fde528220e61 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 23 Feb 2021 19:57:32 -0800 Subject: [PATCH 1/3] Better exception message in to_timedelta --- pandas/_libs/tslibs/timedeltas.pyx | 6 +++++- pandas/core/dtypes/cast.py | 2 +- pandas/tests/indexes/datetimes/test_indexing.py | 3 ++- pandas/tests/tools/test_to_timedelta.py | 3 ++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9ebabd704475b..76a5b6cc9de12 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -347,9 +347,13 @@ def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="rais for i in range(n): try: result[i] = convert_to_timedelta64(values[i], parsed_unit) - except ValueError: + except ValueError as err: if errors == 'coerce': result[i] = NPY_NAT + elif "unit abbreviation w/o a number" in str(err): + # re-raise with more pertinent message + msg = f"Could not convert '{values[i]}' to NumPy timedelta" + raise ValueError(msg) from err else: raise diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 531d784925e9d..8282a0c4353e1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1591,7 +1591,7 @@ def maybe_cast_to_datetime( value = to_timedelta(value, errors="raise")._values except OutOfBoundsDatetime: raise - except (ValueError, TypeError): + except ValueError: pass # coerce datetimelike to object diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 819ec52e1a52f..c65d9098a86a4 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -617,7 +617,8 @@ def test_get_indexer(self): pd.Timedelta("1 hour").to_timedelta64(), "foo", ] - with pytest.raises(ValueError, match="abbreviation w/o a number"): + msg = "Could not convert 'foo' to NumPy timedelta" + with pytest.raises(ValueError, match=msg): idx.get_indexer(target, "nearest", tolerance=tol_bad) with pytest.raises(ValueError, match="abbreviation w/o a number"): idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 6ff14087e6259..9b1d679d22f60 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -108,9 +108,10 @@ def test_to_timedelta_invalid(self): to_timedelta(time(second=1)) assert to_timedelta(time(second=1), errors="coerce") is pd.NaT - msg = "unit abbreviation w/o a number" + msg = "Could not convert 'foo' to NumPy timedelta" with pytest.raises(ValueError, match=msg): to_timedelta(["foo", "bar"]) + tm.assert_index_equal( TimedeltaIndex([pd.NaT, pd.NaT]), to_timedelta(["foo", "bar"], errors="coerce"), From 887178f5cbdd525ae820d9e60388477722c1872a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Feb 2021 09:52:37 -0800 Subject: [PATCH 2/3] catch less in dtypes.cast --- pandas/core/dtypes/cast.py | 18 ++++++++++++------ pandas/tests/frame/test_constructors.py | 2 ++ pandas/tests/series/test_constructors.py | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8282a0c4353e1..c8b32349e22c9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -26,6 +26,7 @@ ) import warnings +from dateutil.parser import ParserError import numpy as np from pandas._libs import ( @@ -1315,11 +1316,7 @@ def convert_dtypes( if ( convert_string or convert_integer or convert_boolean or convert_floating ) and not is_extension: - try: - inferred_dtype = lib.infer_dtype(input_array) - except ValueError: - # Required to catch due to Period. Can remove once GH 23553 is fixed - inferred_dtype = input_array.dtype + inferred_dtype = lib.infer_dtype(input_array) if not convert_string and is_string_dtype(inferred_dtype): inferred_dtype = input_array.dtype @@ -1591,8 +1588,17 @@ def maybe_cast_to_datetime( value = to_timedelta(value, errors="raise")._values except OutOfBoundsDatetime: raise - except ValueError: + except ParserError: + # Note: ParserError subclasses ValueError + # str that we can't parse to datetime pass + except ValueError as err: + if "mixed datetimes and integers in passed array" in str(err): + # equiv: going through construct_1d_ndarray_preserving_na + value = np.array(value, dtype=dtype) + # TODO: just let array_to_datetime handle that? + else: + raise # coerce datetimelike to object elif is_datetime64_dtype( diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 14adc8a992609..79918ee1fb1b2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -85,6 +85,8 @@ def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): arr = arr.reshape(1, 1) msg = "Could not convert object to NumPy timedelta" + if frame_or_series is Series: + msg = "Invalid type for timedelta scalar: " with pytest.raises(ValueError, match=msg): frame_or_series(arr, dtype="m8[ns]") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c2d0bf5975059..c952cbcee2dbc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1332,7 +1332,7 @@ def test_constructor_dtype_timedelta64(self): td.astype("int32") # this is an invalid casting - msg = "Could not convert object to NumPy timedelta" + msg = "Could not convert 'foo' to NumPy timedelta" with pytest.raises(ValueError, match=msg): Series([timedelta(days=1), "foo"], dtype="m8[ns]") From 5062be0c49488c97f407c34ec175e8efd2b630f3 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Feb 2021 15:38:20 -0800 Subject: [PATCH 3/3] be more careful --- pandas/core/dtypes/cast.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c8b32349e22c9..6a0455e0b4cd6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1594,9 +1594,11 @@ def maybe_cast_to_datetime( pass except ValueError as err: if "mixed datetimes and integers in passed array" in str(err): - # equiv: going through construct_1d_ndarray_preserving_na - value = np.array(value, dtype=dtype) - # TODO: just let array_to_datetime handle that? + # array_to_datetime does not allow this; + # when called from _try_cast, this will be followed + # by a call to construct_1d_ndarray_preserving_na + # which will convert these + pass else: raise