From 25ba8080d37fc981f44bb5e561e37dbf6b3acba6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 11 Oct 2022 15:04:24 +0100 Subject: [PATCH 1/2] guess nanoseconds --- pandas/_libs/tslibs/parsing.pyx | 15 +++++++++++++-- pandas/tests/tslibs/test_parsing.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 51bb21404e7b5..b845cfbfaa5a5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -976,7 +976,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: (('hour',), '%H', 2), (('minute',), '%M', 2), (('second',), '%S', 2), - (('microsecond',), '%f', 6), (('second', 'microsecond'), '%S.%f', 0), (('tzinfo',), '%z', 0), (('tzinfo',), '%Z', 0), @@ -1048,7 +1047,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: parsed_formatted = parsed_datetime.strftime(attr_format) for i, token_format in enumerate(format_guess): - token_filled = tokens[i].zfill(padding) + token_filled = _fill_token(tokens[i], padding) if token_format is None and token_filled == parsed_formatted: format_guess[i] = attr_format tokens[i] = token_filled @@ -1090,6 +1089,18 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: else: return None +cdef str _fill_token(token: str, padding: int): + if '.' not in token: + token_filled = token.zfill(padding) + else: + seconds, nanoseconds = token.split('.') + seconds = f'{int(seconds):02d}' + # right-pad so we get nanoseconds, then only take + # first 6 digits (microseconds) as stdlib datetime + # doesn't support nanoseconds + nanoseconds = nanoseconds.ljust(9, '0')[:6] + token_filled = f'{seconds}.{nanoseconds}' + return token_filled @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index e0166c876cdf5..7418d7811ded4 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -295,3 +295,17 @@ def test_is_iso_format(fmt, expected): # see gh-41047 result = parsing.format_is_iso(fmt) assert result == expected + + +@pytest.mark.parametrize( + "input", + [ + "2018-01-01T00:00:00.123456789", + "2018-01-01T00:00:00.123456", + "2018-01-01T00:00:00.123", + ], +) +def test_guess_datetiem_format_f(input): + result = parsing.guess_datetime_format(input) + expected = "%Y-%m-%dT%H:%M:%S.%f" + assert result == expected From 291ff45d5f81088d456b2082b3d8f58e5864668c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 11 Oct 2022 15:40:35 +0100 Subject: [PATCH 2/2] add gh reference number --- pandas/_libs/tslibs/parsing.pyx | 1 + pandas/core/tools/datetimes.py | 2 +- pandas/tests/tslibs/test_parsing.py | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index b845cfbfaa5a5..9bd6eb91af8ef 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1090,6 +1090,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: return None cdef str _fill_token(token: str, padding: int): + cdef str token_filled if '.' not in token: token_filled = token.zfill(padding) else: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index fe14f8e9907d6..18e926c7039b0 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -820,7 +820,7 @@ def to_datetime( to the day starting at noon on January 1, 4713 BC. - If Timestamp convertible (Timestamp, dt.datetime, np.datetimt64 or date string), origin is set to Timestamp identified by origin. - - If a float or integer, origin is the mullisecond difference + - If a float or integer, origin is the millisecond difference relative to 1970-01-01. cache : bool, default True If :const:`True`, use a cache of unique, converted dates to apply the diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 7418d7811ded4..9588f54388d1e 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -305,7 +305,8 @@ def test_is_iso_format(fmt, expected): "2018-01-01T00:00:00.123", ], ) -def test_guess_datetiem_format_f(input): +def test_guess_datetime_format_f(input): + # https://github.com/pandas-dev/pandas/issues/49043 result = parsing.guess_datetime_format(input) expected = "%Y-%m-%dT%H:%M:%S.%f" assert result == expected