From ece009e882aaf94fa4090e52f558c5d70fc5e2e7 Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Tue, 19 Jul 2022 16:52:34 -0700 Subject: [PATCH 1/7] BUG: fixed OutOfBoundsDatetime exception when errors=coerce #45319 --- pandas/core/tools/datetimes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index d4d61df915acb..aa75c05e3695c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -227,7 +227,11 @@ def _maybe_cache( unique_dates = unique(arg) if len(unique_dates) < len(arg): cache_dates = convert_listlike(unique_dates, format) - cache_array = Series(cache_dates, index=unique_dates) + # GH#45319 + try: + cache_array = Series(cache_dates, index=unique_dates) + except OutOfBoundsDatetime: + pass # GH#39882 and GH#35888 in case of None and NaT we get duplicates if not cache_array.index.is_unique: cache_array = cache_array[~cache_array.index.duplicated()] From 75e8fee1d99a73618622c935a346236aa2f841ce Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Mon, 25 Jul 2022 12:45:20 -0700 Subject: [PATCH 2/7] BUG: Added test and release note #45319 --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/tools/test_to_datetime.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 090fea57872c5..de1f930cfea5d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -827,7 +827,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) -- +- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`) Timedelta ^^^^^^^^^ diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index afa06bf1a79af..a88840b13a9ce 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2773,3 +2773,23 @@ def test_to_datetime_monotonic_increasing_index(cache): result = to_datetime(times.iloc[:, 0], cache=cache) expected = times.iloc[:, 0] tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "input", + [ + Series( + [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] + + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * 40) + ), + Series( + [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] + + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * 50) + ), + ], +) +def test_to_datetime_cache_coerce_50_lines(input): + # GH#45319 + result = to_datetime(input, errors="coerce", utc=True) + + assert result[0] is NaT From f17f3583d6137bd28fb77b669d4fe6c3c690d724 Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Thu, 28 Jul 2022 21:00:51 -0700 Subject: [PATCH 3/7] BUG: Restructured test parameters #45319 --- pandas/tests/tools/test_to_datetime.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a88840b13a9ce..6eb1167815cbd 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2776,20 +2776,15 @@ def test_to_datetime_monotonic_increasing_index(cache): @pytest.mark.parametrize( - "input", - [ - Series( - [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] - + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * 40) - ), - Series( - [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] - + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * 50) - ), - ], + "series_length", + range(40, start_caching_at + 5), ) -def test_to_datetime_cache_coerce_50_lines(input): +def test_to_datetime_cache_coerce_50_lines(series_length): # GH#45319 - result = to_datetime(input, errors="coerce", utc=True) + s = Series( + [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] + + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length) + ) + result = to_datetime(s, errors="coerce", utc=True) assert result[0] is NaT From 57aff1152b89e53a554d5cd8e10e71e2b7bb90cc Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Wed, 3 Aug 2022 16:39:51 -0700 Subject: [PATCH 4/7] BUG: Restructured test #45319 --- pandas/tests/tools/test_to_datetime.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 6eb1167815cbd..bad25dc7a7104 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2787,4 +2787,8 @@ def test_to_datetime_cache_coerce_50_lines(series_length): ) result = to_datetime(s, errors="coerce", utc=True) - assert result[0] is NaT + expected = Series( + [NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length) + ) + + tm.assert_series_equal(result, expected) From 4ea55b1f732876361aa21798964821485df18425 Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Tue, 9 Aug 2022 13:50:49 -0700 Subject: [PATCH 5/7] BUG: Restructured parameters for test #45319 --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index bad25dc7a7104..4d454bf0c9ebc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2777,7 +2777,7 @@ def test_to_datetime_monotonic_increasing_index(cache): @pytest.mark.parametrize( "series_length", - range(40, start_caching_at + 5), + [40, start_caching_at, (start_caching_at + 1), (start_caching_at + 5)], ) def test_to_datetime_cache_coerce_50_lines(series_length): # GH#45319 From b98f2eb41872dc69f6dadd723b8d26c6982b1851 Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Wed, 10 Aug 2022 10:32:50 -0700 Subject: [PATCH 6/7] BUG: Renamed test and added raise and ignore cases #45319 --- pandas/tests/tools/test_to_datetime.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4d454bf0c9ebc..76cbcc71e8217 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2779,16 +2779,28 @@ def test_to_datetime_monotonic_increasing_index(cache): "series_length", [40, start_caching_at, (start_caching_at + 1), (start_caching_at + 5)], ) -def test_to_datetime_cache_coerce_50_lines(series_length): +def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length): # GH#45319 s = Series( [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length) ) - result = to_datetime(s, errors="coerce", utc=True) + result1 = to_datetime(s, errors="coerce", utc=True) - expected = Series( + expected1 = Series( [NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length) ) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result1, expected1) + + result2 = to_datetime(s, errors="ignore", utc=True) + + expected2 = Series( + [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] + + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length) + ) + + tm.assert_series_equal(result2, expected2) + + with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"): + to_datetime(s, errors="raise", utc=True) From 9c13e420b3bdcb03c7d2e3544b9b0394db20887e Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Thu, 11 Aug 2022 09:38:35 -0700 Subject: [PATCH 7/7] BUG: Changed exception case #45319 --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index ceac1d4a0f50b..782803b22f905 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -231,7 +231,7 @@ def _maybe_cache( try: cache_array = Series(cache_dates, index=unique_dates) except OutOfBoundsDatetime: - pass + return cache_array # GH#39882 and GH#35888 in case of None and NaT we get duplicates if not cache_array.index.is_unique: cache_array = cache_array[~cache_array.index.duplicated()]