-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Datetime parsing (PDEP-4): allow mixture of ISO formatted strings #50939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
044948f
f4e1392
9f06d80
d7f6056
8952a0e
6e6d579
3d65dbf
b247bbd
2f66f87
eb36d8c
262be89
e01b6ee
4a61e6a
607c77d
531e0e8
5582882
57b922c
313003e
2ede506
3b61e5b
acd44ae
ba6393f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -152,6 +152,7 @@ cdef dict _parse_code_table = {"y": 0, | |
def array_strptime( | ||
ndarray[object] values, | ||
str fmt, | ||
bint fmt_inferred=False, | ||
bint exact=True, | ||
errors="raise", | ||
bint utc=False, | ||
|
@@ -186,6 +187,7 @@ def array_strptime( | |
bint iso_format = format_is_iso(fmt) | ||
NPY_DATETIMEUNIT out_bestunit | ||
int out_local = 0, out_tzoffset = 0 | ||
bint string_to_dts_succeeded = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any particular reason this is changed from failed? doesn't really matter to me, just curious There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it just simplifies the logic |
||
|
||
assert is_raise or is_ignore or is_coerce | ||
|
||
|
@@ -306,43 +308,55 @@ def array_strptime( | |
else: | ||
val = str(val) | ||
|
||
if iso_format: | ||
string_to_dts_failed = string_to_dts( | ||
if fmt == "ISO8601": | ||
string_to_dts_succeeded = not string_to_dts( | ||
val, &dts, &out_bestunit, &out_local, | ||
&out_tzoffset, False, None, False | ||
) | ||
elif iso_format: | ||
string_to_dts_succeeded = not string_to_dts( | ||
val, &dts, &out_bestunit, &out_local, | ||
&out_tzoffset, False, fmt, exact | ||
) | ||
if not string_to_dts_failed: | ||
# No error reported by string_to_dts, pick back up | ||
# where we left off | ||
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) | ||
if out_local == 1: | ||
# Store the out_tzoffset in seconds | ||
# since we store the total_seconds of | ||
# dateutil.tz.tzoffset objects | ||
tz = timezone(timedelta(minutes=out_tzoffset)) | ||
result_timezone[i] = tz | ||
out_local = 0 | ||
out_tzoffset = 0 | ||
iresult[i] = value | ||
check_dts_bounds(&dts) | ||
continue | ||
if string_to_dts_succeeded: | ||
# No error reported by string_to_dts, pick back up | ||
# where we left off | ||
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) | ||
if out_local == 1: | ||
# Store the out_tzoffset in seconds | ||
# since we store the total_seconds of | ||
# dateutil.tz.tzoffset objects | ||
tz = timezone(timedelta(minutes=out_tzoffset)) | ||
result_timezone[i] = tz | ||
out_local = 0 | ||
out_tzoffset = 0 | ||
iresult[i] = value | ||
check_dts_bounds(&dts) | ||
continue | ||
|
||
if parse_today_now(val, &iresult[i], utc): | ||
continue | ||
|
||
# Some ISO formats can't be parsed by string_to_dts | ||
# For example, 6-digit YYYYMD. So, if there's an error, | ||
# try the string-matching code below. | ||
# For example, 6-digit YYYYMD. So, if there's an error, and a format | ||
# was specified, then try the string-matching code below. If the format | ||
# specified was 'ISO8601', then we need to error, because | ||
# only string_to_dts handles mixed ISO8601 formats. | ||
if not string_to_dts_succeeded and fmt == "ISO8601": | ||
raise ValueError(f"Time data {val} is not ISO8601 format") | ||
|
||
# exact matching | ||
if exact: | ||
found = format_regex.match(val) | ||
if not found: | ||
raise ValueError(f"time data \"{val}\" doesn't " | ||
f"match format \"{fmt}\"") | ||
raise ValueError( | ||
f"time data \"{val}\" doesn't " | ||
f"match {'(inferred) '*fmt_inferred}format \"{fmt}\"" | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
if len(val) != found.end(): | ||
raise ValueError( | ||
f"unconverted data remains: " | ||
"unconverted data remains when parsing with " | ||
f"{'(inferred) '*fmt_inferred}format \"{fmt}\": " | ||
f'"{val[found.end():]}"' | ||
) | ||
|
||
|
@@ -352,7 +366,7 @@ def array_strptime( | |
if not found: | ||
raise ValueError( | ||
f"time data \"{val}\" doesn't match " | ||
f"format \"{fmt}\"" | ||
f"{'(inferred) '*fmt_inferred}format \"{fmt}\"" | ||
) | ||
|
||
iso_year = -1 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -442,11 +442,15 @@ def _convert_listlike_datetimes( | |
|
||
arg = ensure_object(arg) | ||
|
||
format_inferred = False | ||
if format is None: | ||
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) | ||
format_inferred = True | ||
|
||
if format is not None: | ||
return _array_strptime_with_fallback(arg, name, utc, format, exact, errors) | ||
return _array_strptime_with_fallback( | ||
arg, name, utc, format, format_inferred, exact, errors | ||
) | ||
|
||
result, tz_parsed = objects_to_datetime64ns( | ||
arg, | ||
|
@@ -471,13 +475,16 @@ def _array_strptime_with_fallback( | |
name, | ||
utc: bool, | ||
fmt: str, | ||
fmt_inferred: bool, | ||
exact: bool, | ||
errors: str, | ||
) -> Index: | ||
""" | ||
Call array_strptime, with fallback behavior depending on 'errors'. | ||
""" | ||
result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc) | ||
result, timezones = array_strptime( | ||
arg, fmt, fmt_inferred=fmt_inferred, exact=exact, errors=errors, utc=utc | ||
) | ||
if any(tz is not None for tz in timezones): | ||
return _return_parsed_timezone_results(result, timezones, utc, name) | ||
|
||
|
@@ -759,6 +766,7 @@ def to_datetime( | |
<https://docs.python.org/3/library/datetime.html | ||
#strftime-and-strptime-behavior>`_ for more information on choices, though | ||
note that :const:`"%f"` will parse all the way up to nanoseconds. | ||
You can also pass "ISO8601" to parse any ISO8601 time string. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a link that explains ISO8601? (eg https://en.wikipedia.org/wiki/ISO_8601) Although looking at that link makes me wonder if we then should be explicit about that this is only the calendar date + time formatting (and not week dates or ordinal dates) |
||
exact : bool, default True | ||
Control how `format` is used: | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
- Status: Accepted | ||
- Discussion: [#48621](https://github.com/pandas-dev/pandas/pull/48621) | ||
- Author: [Marco Gorelli](https://github.com/MarcoGorelli) | ||
- Revision: 1 | ||
- Revision: 2 | ||
|
||
## Abstract | ||
|
||
|
@@ -64,6 +64,11 @@ Out[3]: | |
1 2000-01-13 | ||
dtype: datetime64[ns] | ||
``` | ||
or, if their dates are all ISO8601, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can update the one above to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I missed this one - thanks! |
||
```ipython | ||
In [4]: pd.to_datetime(['2020-01-01', '2020-01-01 03:00'], format='ISO8601') | ||
Out[4]: DatetimeIndex(['2020-01-01 00:00:00', '2020-01-01 03:00:00'], dtype='datetime64[ns]', freq=None) | ||
``` | ||
|
||
## Usage and Impact | ||
|
||
|
@@ -99,3 +104,4 @@ We could make ``guess_datetime_format`` smarter by using a random sample of elem | |
### PDEP History | ||
|
||
- 18 September 2022: Initial draft | ||
- 23 January 2023: Amended to mention ``format='ISO8601'`` option |
Uh oh!
There was an error while loading. Please reload this page.