From 2004fb7704ecd71a46ccf5ad6a21126895734151 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sun, 4 Dec 2022 13:10:48 +0000 Subject: [PATCH 1/7] remove ymd special-path --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_libs/tslib.pyx | 20 ++--- pandas/_libs/tslibs/parsing.pyi | 5 -- pandas/_libs/tslibs/parsing.pyx | 21 +---- pandas/core/tools/datetimes.py | 101 +------------------------ pandas/tests/tools/test_to_datetime.py | 24 +++--- pandas/tests/tslibs/test_parsing.py | 2 +- 7 files changed, 25 insertions(+), 149 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d8609737b8c7a..5e44808c4ed4f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -659,6 +659,7 @@ Datetimelike - Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp` or ``datetime`` objects with non-ISO8601 ``format`` (:issue:`49298`) +- Bug in :func:`to_datetime` with ``exact`` and ``format=%Y%m%d`` wasn't raising if the input didn't match the format (:issue:`50051`) - Timedelta diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b78174483be51..ea2fd2ff8a39c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -546,17 +546,10 @@ cpdef array_to_datetime( seen_datetime = True iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) - elif is_integer_object(val) or is_float_object(val): - if require_iso8601: - if is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError( - f"time data \"{val}\" at position {i} doesn't " - f"match format \"{format}\"" - ) - return values, tz_out + elif ( + (is_integer_object(val) or is_float_object(val)) + and format is None + ): # these must be ns unit by-definition seen_integer = True @@ -575,7 +568,10 @@ cpdef array_to_datetime( except OverflowError: iresult[i] = NPY_NAT - elif isinstance(val, str): + elif ( + (is_integer_object(val) or is_float_object(val)) + or isinstance(val, str) + ): # string if type(val) is not str: # GH#32264 np.str_ object diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index db1388672b37c..d4287622ab0ab 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -27,11 +27,6 @@ def try_parse_dates( dayfirst: bool = ..., default: datetime | None = ..., ) -> npt.NDArray[np.object_]: ... -def try_parse_year_month_day( - years: npt.NDArray[np.object_], # object[:] - months: npt.NDArray[np.object_], # object[:] - days: npt.NDArray[np.object_], # object[:] -) -> npt.NDArray[np.object_]: ... def try_parse_datetime_components( years: npt.NDArray[np.object_], # object[:] months: npt.NDArray[np.object_], # object[:] diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 25a2722c48bd6..47a678a3f2bbc 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -744,25 +744,6 @@ def try_parse_dates( return result.base # .base to access underlying ndarray -def try_parse_year_month_day( - object[:] years, object[:] months, object[:] days -) -> np.ndarray: - cdef: - Py_ssize_t i, n - object[::1] result - - n = len(years) - # TODO(cython3): Use len instead of `shape[0]` - if months.shape[0] != n or days.shape[0] != n: - raise ValueError("Length of years/months/days must all be equal") - result = np.empty(n, dtype="O") - - for i in range(n): - result[i] = datetime(int(years[i]), int(months[i]), int(days[i])) - - return result.base # .base to access underlying ndarray - - def try_parse_datetime_components(object[:] years, object[:] months, object[:] days, @@ -890,7 +871,7 @@ def format_is_iso(f: str) -> bint: but must be consistent. Leading 0s in dates and times are optional. """ iso_template = "%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}".format - excluded_formats = ["%Y%m%d", "%Y%m", "%Y"] + excluded_formats = ["%Y%m", "%Y"] for date_sep in [" ", "/", "\\", "-", ".", ""]: for time_sep in [" ", "T"]: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 430343beb630b..f3feab71a4d3c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -24,8 +24,6 @@ Timedelta, Timestamp, iNaT, - nat_strings, - parsing, timezones as libtimezones, ) from pandas._libs.tslibs.parsing import ( @@ -38,7 +36,6 @@ AnyArrayLike, ArrayLike, DateTimeErrorChoices, - npt, ) from pandas.core.dtypes.common import ( @@ -57,13 +54,11 @@ ABCDataFrame, ABCSeries, ) -from pandas.core.dtypes.missing import notna from pandas.arrays import ( DatetimeArray, IntegerArray, ) -from pandas.core import algorithms from pandas.core.algorithms import unique from pandas.core.arrays.base import ExtensionArray from pandas.core.arrays.datetimes import ( @@ -407,7 +402,6 @@ def _convert_listlike_datetimes( # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation - orig_arg = arg try: arg, _ = maybe_convert_dtype(arg, copy=False, tz=libtimezones.maybe_get_tz(tz)) except TypeError: @@ -435,8 +429,8 @@ def _convert_listlike_datetimes( require_iso8601 = not infer_datetime_format if format is not None and not require_iso8601: - res = _to_datetime_with_format( - arg, orig_arg, name, utc, format, exact, errors, infer_datetime_format + res = _array_strptime_with_fallback( + arg, name, utc, format, exact, errors, infer_datetime_format ) if res is not None: return res @@ -510,43 +504,6 @@ def _array_strptime_with_fallback( return _box_as_indexlike(result, utc=utc, name=name) -def _to_datetime_with_format( - arg, - orig_arg, - name, - utc: bool, - fmt: str, - exact: bool, - errors: str, - infer_datetime_format: bool, -) -> Index | None: - """ - Try parsing with the given format, returning None on failure. - """ - result = None - - # shortcut formatting here - if fmt == "%Y%m%d": - # pass orig_arg as float-dtype may have been converted to - # datetime64[ns] - orig_arg = ensure_object(orig_arg) - try: - # may return None without raising - result = _attempt_YYYYMMDD(orig_arg, errors=errors) - except (ValueError, TypeError, OutOfBoundsDatetime) as err: - raise ValueError( - "cannot convert the input to '%Y%m%d' date format" - ) from err - if result is not None: - return _box_as_indexlike(result, utc=utc, name=name) - - # fallback - res = _array_strptime_with_fallback( - arg, name, utc, fmt, exact, errors, infer_datetime_format - ) - return res - - def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: """ to_datetime specalized to the case where a 'unit' is passed. @@ -1244,60 +1201,6 @@ def coerce(values): return values -def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None: - """ - try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, - arg is a passed in as an object dtype, but could really be ints/strings - with nan-like/or floats (e.g. with nan) - - Parameters - ---------- - arg : np.ndarray[object] - errors : {'raise','ignore','coerce'} - """ - - def calc(carg): - # calculate the actual result - carg = carg.astype(object, copy=False) - parsed = parsing.try_parse_year_month_day( - carg / 10000, carg / 100 % 100, carg % 100 - ) - return tslib.array_to_datetime(parsed, errors=errors)[0] - - def calc_with_mask(carg, mask): - result = np.empty(carg.shape, dtype="M8[ns]") - iresult = result.view("i8") - iresult[~mask] = iNaT - - masked_result = calc(carg[mask].astype(np.float64).astype(np.int64)) - result[mask] = masked_result.astype("M8[ns]") - return result - - # try intlike / strings that are ints - try: - return calc(arg.astype(np.int64)) - except (ValueError, OverflowError, TypeError): - pass - - # a float with actual np.nan - try: - carg = arg.astype(np.float64) - return calc_with_mask(carg, notna(carg)) - except (ValueError, OverflowError, TypeError): - pass - - # string with NaN-like - try: - # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected - # "Union[Union[ExtensionArray, ndarray], Index, Series]" - mask = ~algorithms.isin(arg, list(nat_strings)) # type: ignore[arg-type] - return calc_with_mask(arg, mask) - except (ValueError, OverflowError, TypeError): - pass - - return None - - __all__ = [ "DateParseError", "should_cache", diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7df45975475dd..f94d5e56c4b8c 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -125,24 +125,21 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): expected[2] = np.nan ser[2] = np.nan - result = to_datetime(ser, format="%Y%m%d", cache=cache) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match=None): + to_datetime(ser, format="%Y%m%d", cache=cache) # string with NaT ser2 = ser.apply(str) ser2[2] = "nat" - result = to_datetime(ser2, format="%Y%m%d", cache=cache) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match=None): + to_datetime(ser2, format="%Y%m%d", cache=cache) def test_to_datetime_format_YYYYMMDD_ignore(self, cache): # coercion # GH 7930 ser = Series([20121231, 20141231, 99991231]) + expected = Series([20121231, 20141231, 99991231], dtype=object) result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache) - expected = Series( - [datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)], - dtype=object, - ) tm.assert_series_equal(result, expected) def test_to_datetime_format_YYYYMMDD_coercion(self, cache): @@ -249,7 +246,7 @@ def test_to_datetime_format_integer(self, cache): # valid date, length == 8 [20121030, datetime(2012, 10, 30)], # short valid date, length == 6 - [199934, datetime(1999, 3, 4)], + [199934, 199934], # long integer date partially parsed to datetime(2012,1,1), length > 8 [2012010101, 2012010101], # invalid date partially parsed to datetime(2012,9,9), length == 8 @@ -1714,8 +1711,8 @@ def test_dataframe_coerce(self, cache): df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) msg = ( - "cannot assemble the datetimes: time data .+ does not " - r"match format '%Y%m%d' \(match\)" + r"cannot assemble the datetimes: time data .+ doesn't " + r'match format "%Y%m%d"' ) with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) @@ -1791,7 +1788,10 @@ def test_dataframe_mixed(self, cache): def test_dataframe_float(self, cache): # float df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]}) - msg = "cannot assemble the datetimes: unconverted data remains: 1" + msg = ( + r'cannot assemble the datetimes: time data "20000151" at ' + r'position 0 doesn\'t match format "%Y%m%d"' + ) with pytest.raises(ValueError, match=msg): to_datetime(df, cache=cache) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 4d7501cdadcd9..6e03348345408 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -288,7 +288,7 @@ def test_parse_time_string_check_instance_type_raise_exception(): ("%Y-%m-%dT%H:%M:%S.%f", True), ("%Y-%m-%dT%H:%M:%S.%f%z", True), ("%Y-%m-%dT%H:%M:%S.%f%Z", False), - ("%Y%m%d", False), + ("%Y%m%d", True), ("%Y%m", False), ("%Y", False), ("%Y-%m-%d", True), From 83a4ec641521b7993748b94eb963aaa672cebcd6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sun, 4 Dec 2022 16:29:27 +0000 Subject: [PATCH 2/7] fix doctest --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index f3feab71a4d3c..417e9b4bfc45a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -933,7 +933,7 @@ def to_datetime( in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') - datetime.datetime(1300, 1, 1, 0, 0) + '13000101' >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') NaT From 69a487a36831dd752581568201325052f38b0361 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 5 Dec 2022 09:31:26 +0000 Subject: [PATCH 3/7] update whatsnew --- doc/source/whatsnew/v2.0.0.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0c870784c948d..b481729e94e95 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -636,6 +636,7 @@ Performance improvements - Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`) - Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`) - Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`) +- Performance improvement in :func:`to_datetime` with ``format='%Y%m%d'`` (:issue:`17410`) .. --------------------------------------------------------------------------- .. _whatsnew_200.bug_fixes: @@ -659,8 +660,9 @@ Datetimelike - Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp` or ``datetime`` objects with non-ISO8601 ``format`` (:issue:`49298`) -- Bug in :func:`to_datetime` with ``exact`` and ``format=%Y%m%d`` wasn't raising if the input didn't match the format (:issue:`50051`) +- Bug in :func:`to_datetime` with ``exact`` and ``format='%Y%m%d'`` wasn't raising if the input didn't match the format (:issue:`50051`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime``, or ``np.datetime64`` objects with non-ISO8601 ``format`` (:issue:`49298`, :issue:`50036`) +- Bug in :func:`to_datetime` with ``errors='ignore'`` and ``format='%Y%m%d'`` was returning out-of-bounds inputs as ``datetime.datetime`` objects instead of returning the input (:issue:`50054`) - Timedelta From a004790e2d782e189b7226ad8e37c782e7817282 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 5 Dec 2022 12:52:34 +0000 Subject: [PATCH 4/7] use slow path --- doc/source/whatsnew/v2.0.0.rst | 1 - pandas/_libs/tslibs/parsing.pyx | 2 +- pandas/tests/tools/test_to_datetime.py | 11 ++++------- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b481729e94e95..41bdf612234ac 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -636,7 +636,6 @@ Performance improvements - Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`) - Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`) - Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`) -- Performance improvement in :func:`to_datetime` with ``format='%Y%m%d'`` (:issue:`17410`) .. --------------------------------------------------------------------------- .. _whatsnew_200.bug_fixes: diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 47a678a3f2bbc..cdccccf1f0a5d 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -871,7 +871,7 @@ def format_is_iso(f: str) -> bint: but must be consistent. Leading 0s in dates and times are optional. """ iso_template = "%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}".format - excluded_formats = ["%Y%m", "%Y"] + excluded_formats = ["%Y%m%d", "%Y%m", "%Y"] for date_sep in [" ", "/", "\\", "-", ".", ""]: for time_sep in [" ", "T"]: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index aa83e2d389a7e..229da46bd7119 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -246,7 +246,7 @@ def test_to_datetime_format_integer(self, cache): # valid date, length == 8 [20121030, datetime(2012, 10, 30)], # short valid date, length == 6 - [199934, 199934], + [199934, datetime(1999, 3, 4)], # long integer date partially parsed to datetime(2012,1,1), length > 8 [2012010101, 2012010101], # invalid date partially parsed to datetime(2012,9,9), length == 8 @@ -1724,8 +1724,8 @@ def test_dataframe_coerce(self, cache): df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) msg = ( - r"cannot assemble the datetimes: time data .+ doesn't " - r'match format "%Y%m%d"' + "cannot assemble the datetimes: time data .+ does not " + r"match format '%Y%m%d' \(match\)" ) with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) @@ -1801,10 +1801,7 @@ def test_dataframe_mixed(self, cache): def test_dataframe_float(self, cache): # float df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]}) - msg = ( - r'cannot assemble the datetimes: time data "20000151" at ' - r'position 0 doesn\'t match format "%Y%m%d"' - ) + msg = "cannot assemble the datetimes: unconverted data remains: 1" with pytest.raises(ValueError, match=msg): to_datetime(df, cache=cache) From ad8a56afd93fb0c2af68ff42f0f3d10156c8b7b5 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 5 Dec 2022 13:02:04 +0000 Subject: [PATCH 5/7] fixup post-merge --- doc/source/whatsnew/v2.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 41bdf612234ac..d82fff7de2493 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -658,7 +658,6 @@ Datetimelike - Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`) - Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) -- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp` or ``datetime`` objects with non-ISO8601 ``format`` (:issue:`49298`) - Bug in :func:`to_datetime` with ``exact`` and ``format='%Y%m%d'`` wasn't raising if the input didn't match the format (:issue:`50051`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime``, or ``np.datetime64`` objects with non-ISO8601 ``format`` (:issue:`49298`, :issue:`50036`) - Bug in :func:`to_datetime` with ``errors='ignore'`` and ``format='%Y%m%d'`` was returning out-of-bounds inputs as ``datetime.datetime`` objects instead of returning the input (:issue:`50054`) From 6e8280b45f4ffe891b6476065fa9a2f8832889e3 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 5 Dec 2022 13:07:38 +0000 Subject: [PATCH 6/7] simplify --- pandas/_libs/tslib.pyx | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3364f1536df87..35a4131d11d50 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -550,10 +550,17 @@ cpdef array_to_datetime( seen_datetime = True iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) - elif ( - (is_integer_object(val) or is_float_object(val)) - and format is None - ): + elif is_integer_object(val) or is_float_object(val): + if require_iso8601: + if is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise ValueError( + f"time data \"{val}\" at position {i} doesn't " + f"match format \"{format}\"" + ) + return values, tz_out # these must be ns unit by-definition seen_integer = True @@ -572,10 +579,7 @@ cpdef array_to_datetime( except OverflowError: iresult[i] = NPY_NAT - elif ( - (is_integer_object(val) or is_float_object(val)) - or isinstance(val, str) - ): + elif isinstance(val, str): # string if type(val) is not str: # GH#32264 np.str_ object From 55dc073412db33c57e8971850c675675e5762b23 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 5 Dec 2022 13:08:33 +0000 Subject: [PATCH 7/7] clean up --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/tools/test_to_datetime.py | 3 ++- pandas/tests/tslibs/test_parsing.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 37e8c028ddffc..b9b955acb757f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -660,8 +660,8 @@ Datetimelike - Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`) - Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) -- Bug in :func:`to_datetime` with ``exact`` and ``format='%Y%m%d'`` wasn't raising if the input didn't match the format (:issue:`50051`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime``, or ``np.datetime64`` objects with non-ISO8601 ``format`` (:issue:`49298`, :issue:`50036`) +- Bug in :func:`to_datetime` with ``exact`` and ``format='%Y%m%d'`` wasn't raising if the input didn't match the format (:issue:`50051`) - Bug in :func:`to_datetime` with ``errors='ignore'`` and ``format='%Y%m%d'`` was returning out-of-bounds inputs as ``datetime.datetime`` objects instead of returning the input (:issue:`50054`) - diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 229da46bd7119..5046b7adc8aef 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -117,6 +117,7 @@ def test_to_datetime_format_YYYYMMDD(self, cache): tm.assert_series_equal(result, expected) def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): + # GH50051 ser = Series([19801222, 19801222] + [19810105] * 5) # with NaT expected = Series( @@ -136,7 +137,7 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): def test_to_datetime_format_YYYYMMDD_ignore(self, cache): # coercion - # GH 7930 + # GH 7930, GH50054 ser = Series([20121231, 20141231, 99991231]) expected = Series([20121231, 20141231, 99991231], dtype=object) result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 6e03348345408..4d7501cdadcd9 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -288,7 +288,7 @@ def test_parse_time_string_check_instance_type_raise_exception(): ("%Y-%m-%dT%H:%M:%S.%f", True), ("%Y-%m-%dT%H:%M:%S.%f%z", True), ("%Y-%m-%dT%H:%M:%S.%f%Z", False), - ("%Y%m%d", True), + ("%Y%m%d", False), ("%Y%m", False), ("%Y", False), ("%Y-%m-%d", True),