From 929272889bcdecae17e36aa30a692e9844580c40 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 16 Oct 2020 12:05:00 +0200 Subject: [PATCH 1/6] Fix parsing of ISO8601 strings with empty period --- pandas/_libs/tslibs/timedeltas.pyx | 11 ++++------- pandas/tests/scalar/timedelta/test_constructors.py | 2 ++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index ee32ed53a908b..a131e89419fc1 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -620,10 +620,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: if not len(unit): number.append(c) else: - # if in days, pop trailing T - if unit[-1] == 'T': - unit.pop() - elif 'H' in unit or 'M' in unit: + if 'H' in unit or 'M' in unit: if len(number) > 2: raise ValueError(err_msg) r = timedelta_from_spec(number, '0', unit) @@ -632,14 +629,14 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: neg = 0 unit, number = [], [c] else: - if c == 'P': - pass # ignore leading character + if c == 'P' or c == 'T': + pass # ignore marking characters P and T elif c == '-': if neg or have_value: raise ValueError(err_msg) else: neg = 1 - elif c in ['D', 'T', 'H', 'M']: + elif c in ['D', 'H', 'M']: unit.append(c) elif c == '.': # append any seconds diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 23fb25b838da6..03c3a1d92ea1d 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -228,6 +228,8 @@ def test_overflow_on_construction(): ("P0DT0H0M0.001S", Timedelta(milliseconds=1)), ("P0DT0H1M0S", Timedelta(minutes=1)), ("P1DT25H61M61S", Timedelta(days=1, hours=25, minutes=61, seconds=61)), + ("PT1S", Timedelta(seconds=1)), + ("PT0S", Timedelta(seconds=0)), ], ) def test_iso_constructor(fmt, exp): From d8e8ad9f028e627704418456e36f94f4e6774e42 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 16 Oct 2020 12:14:47 +0200 Subject: [PATCH 2/6] Add 'W' as a valid ISO 8601 designator --- pandas/_libs/tslibs/timedeltas.pyx | 4 ++-- pandas/tests/scalar/timedelta/test_constructors.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a131e89419fc1..f7730487f52bb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -620,7 +620,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: if not len(unit): number.append(c) else: - if 'H' in unit or 'M' in unit: + if 'H' in unit or 'M' in unit or 'W' in unit: if len(number) > 2: raise ValueError(err_msg) r = timedelta_from_spec(number, '0', unit) @@ -636,7 +636,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: raise ValueError(err_msg) else: neg = 1 - elif c in ['D', 'H', 'M']: + elif c in ['W', 'D', 'H', 'M']: unit.append(c) elif c == '.': # append any seconds diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 03c3a1d92ea1d..f074566f66c30 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -230,6 +230,7 @@ def test_overflow_on_construction(): ("P1DT25H61M61S", Timedelta(days=1, hours=25, minutes=61, seconds=61)), ("PT1S", Timedelta(seconds=1)), ("PT0S", Timedelta(seconds=0)), + ("P1WT0S", Timedelta(days=7, seconds=0)), ], ) def test_iso_constructor(fmt, exp): From fbf93daf7f64c66db72280f4b63176171a0d3366 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 16 Oct 2020 13:19:40 +0200 Subject: [PATCH 3/6] Parse all components of ISO 8601 durations, even if S is not included --- pandas/_libs/tslibs/timedeltas.pyx | 10 +++++++--- pandas/tests/scalar/timedelta/test_constructors.py | 3 +++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f7730487f52bb..2e7190d3f84d0 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -620,9 +620,6 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: if not len(unit): number.append(c) else: - if 'H' in unit or 'M' in unit or 'W' in unit: - if len(number) > 2: - raise ValueError(err_msg) r = timedelta_from_spec(number, '0', unit) result += timedelta_as_neg(r, neg) @@ -638,6 +635,13 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: neg = 1 elif c in ['W', 'D', 'H', 'M']: unit.append(c) + if c in ['H', 'M'] and len(number) > 2: + raise ValueError(err_msg) + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [] elif c == '.': # append any seconds if len(number): diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index f074566f66c30..70ddff35d6e2a 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -231,6 +231,9 @@ def test_overflow_on_construction(): ("PT1S", Timedelta(seconds=1)), ("PT0S", Timedelta(seconds=0)), ("P1WT0S", Timedelta(days=7, seconds=0)), + ("P1D", Timedelta(days=1)), + ("P1DT1H", Timedelta(days=1, hours=1)), + ("P1W", Timedelta(days=7)), ], ) def test_iso_constructor(fmt, exp): From 949fa5ed7971742989ba80cb0acf82b1eea859f7 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 16 Oct 2020 13:21:06 +0200 Subject: [PATCH 4/6] Use chained comparison --- pandas/_libs/tslibs/timedeltas.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2e7190d3f84d0..c369ed75a550b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -604,7 +604,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: for c in ts: # number (ascii codes) - if ord(c) >= 48 and ord(c) <= 57: + if 48 <= ord(c) <= 57: have_value = 1 if have_dot: From 0511e016decdd5b4ca094a4623982d7eba1369bd Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 16 Oct 2020 13:34:54 +0200 Subject: [PATCH 5/6] Add what's new entry --- doc/source/whatsnew/v1.2.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9fc094330fb36..e23871a925570 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -348,8 +348,7 @@ Datetimelike Timedelta ^^^^^^^^^ - Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`) -- -- +- Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`) Timezones ^^^^^^^^^ From 3786f8b1cb31afa3e6cd0923e86fa0996158c40a Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 16 Oct 2020 14:11:33 +0200 Subject: [PATCH 6/6] Remove limitation that seconds has two or fewer digits - ISO 8601 doesn't seem to express that explicit limitations here are necessary. --- pandas/_libs/tslibs/timedeltas.pyx | 7 ++----- pandas/tests/scalar/timedelta/test_constructors.py | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c369ed75a550b..c6b47d09cf0bd 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -662,11 +662,8 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: r = timedelta_from_spec(number, '0', dec_unit) result += timedelta_as_neg(r, neg) else: # seconds - if len(number) <= 2: - r = timedelta_from_spec(number, '0', 'S') - result += timedelta_as_neg(r, neg) - else: - raise ValueError(err_msg) + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) else: raise ValueError(err_msg) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 70ddff35d6e2a..06bdb8a6cf0a2 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -234,6 +234,8 @@ def test_overflow_on_construction(): ("P1D", Timedelta(days=1)), ("P1DT1H", Timedelta(days=1, hours=1)), ("P1W", Timedelta(days=7)), + ("PT300S", Timedelta(seconds=300)), + ("P1DT0H0M00000000000S", Timedelta(days=1)), ], ) def test_iso_constructor(fmt, exp): @@ -247,7 +249,6 @@ def test_iso_constructor(fmt, exp): "PDTHMS", "P0DT999H999M999S", "P1DT0H0M0.0000000000000S", - "P1DT0H0M00000000000S", "P1DT0H0M0.S", ], )