Skip to content

Commit 2008eb3

Browse files
author
MarcoGorelli
committed
fixup
1 parent 82f3252 commit 2008eb3

File tree

4 files changed

+122
-104
lines changed

4 files changed

+122
-104
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ Other enhancements
6464
- :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`)
6565
- :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`)
6666
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
67-
- :func:`to_datetime` now skips ``datetime.datetime`` and :class:`Timestamp` objects when passing ``format`` argument instead of raising a ``ValueError``. (:issue:`49298`)
67+
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
68+
-
6869

6970
.. ---------------------------------------------------------------------------
7071
.. _whatsnew_200.notable_bug_fixes:
@@ -631,6 +632,7 @@ Datetimelike
631632
- Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`)
632633
- Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`)
633634
- Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`)
635+
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp` or ``datetime`` objects with non-ISO8601 ``format`` (:issue:`49298`)
634636
-
635637

636638
Timedelta

pandas/_libs/tslibs/strptime.pyx

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ from numpy cimport (
2020
)
2121

2222
from pandas._libs.missing cimport checknull_with_nat_and_na
23+
from pandas._libs.tslibs.conversion cimport convert_timezone
2324
from pandas._libs.tslibs.nattype cimport (
2425
NPY_NAT,
2526
c_nat_strings as nat_strings,
@@ -59,7 +60,13 @@ cdef dict _parse_code_table = {'y': 0,
5960
'u': 22}
6061

6162

62-
def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='raise'):
63+
def array_strptime(
64+
ndarray[object] values,
65+
str fmt,
66+
bint exact=True,
67+
errors='raise',
68+
bint utc=False,
69+
):
6370
"""
6471
Calculates the datetime structs represented by the passed array of strings
6572
@@ -84,6 +91,9 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai
8491
bint is_raise = errors=='raise'
8592
bint is_ignore = errors=='ignore'
8693
bint is_coerce = errors=='coerce'
94+
bint found_naive = False
95+
bint found_tz = False
96+
tzinfo tz_out = None
8797

8898
assert is_raise or is_ignore or is_coerce
8999

@@ -127,7 +137,6 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai
127137
result_timezone = np.empty(n, dtype='object')
128138

129139
dts.us = dts.ps = dts.as = 0
130-
expect_tz_aware = "%z" in fmt or "%Z" in fmt
131140

132141
for i in range(n):
133142
val = values[i]
@@ -139,15 +148,22 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai
139148
iresult[i] = NPY_NAT
140149
continue
141150
elif PyDateTime_Check(val):
151+
if val.tzinfo is not None:
152+
found_tz = True
153+
else:
154+
found_naive = True
155+
tz_out = convert_timezone(
156+
val.tzinfo,
157+
tz_out,
158+
found_naive,
159+
found_tz,
160+
utc,
161+
)
142162
if isinstance(val, _Timestamp):
143-
iresult[i] = val.tz_localize(None)._as_unit("ns").value
163+
iresult[i] = val.tz_localize(None).as_unit("ns").value
144164
else:
145-
iresult[i] = pydatetime_to_dt64(val, &dts)
165+
iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts)
146166
check_dts_bounds(&dts)
147-
if val.tzinfo is None and expect_tz_aware:
148-
raise ValueError("Cannot mix tz-aware with tz-naive values")
149-
elif val.tzinfo is not None and not expect_tz_aware:
150-
raise ValueError("Cannot mix tz-aware with tz-naive values")
151167
result_timezone[i] = val.tzinfo
152168
continue
153169
else:

pandas/core/tools/datetimes.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,14 @@ def _return_parsed_timezone_results(
318318
)
319319
if utc:
320320
# Convert to the same tz
321-
tz_results = np.array([tz_result.tz_convert("utc") for tz_result in tz_results])
321+
tz_results = np.array(
322+
[
323+
tz_result.tz_convert("utc")
324+
if tz_result.tzinfo is not None
325+
else tz_result.tz_localize("utc")
326+
for tz_result in tz_results
327+
]
328+
)
322329

323330
return Index(tz_results, name=name)
324331

@@ -468,7 +475,9 @@ def _array_strptime_with_fallback(
468475
Call array_strptime, with fallback behavior depending on 'errors'.
469476
"""
470477
try:
471-
result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors)
478+
result, timezones = array_strptime(
479+
arg, fmt, exact=exact, errors=errors, utc=utc
480+
)
472481
except OutOfBoundsDatetime:
473482
if errors == "raise":
474483
raise
@@ -495,7 +504,7 @@ def _array_strptime_with_fallback(
495504
# Indicates to the caller to fallback to objects_to_datetime64ns
496505
return None
497506
else:
498-
if any(timezones):
507+
if any([i is not None for i in timezones]):
499508
return _return_parsed_timezone_results(result, timezones, utc, name)
500509

501510
return _box_as_indexlike(result, utc=utc, name=name)

pandas/tests/tools/test_to_datetime.py

Lines changed: 83 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,89 @@ def test_to_datetime_mixed_datetime_and_string(self):
469469
expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60))
470470
tm.assert_index_equal(res, expected)
471471

472+
@pytest.mark.parametrize(
473+
"fmt",
474+
["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"],
475+
ids=["non-ISO8601 format", "ISO8601 format"],
476+
)
477+
@pytest.mark.parametrize(
478+
"utc, input, expected",
479+
[
480+
pytest.param(
481+
True,
482+
["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"],
483+
DatetimeIndex(
484+
["2000-01-01 09:00:00+00:00", "2000-01-01 10:00:00+00:00"],
485+
dtype="datetime64[ns, UTC]",
486+
),
487+
id="all tz-aware, with utc",
488+
),
489+
pytest.param(
490+
False,
491+
["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"],
492+
DatetimeIndex(
493+
["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"],
494+
tz=pytz.FixedOffset(-480),
495+
),
496+
id="all tz-aware, without utc",
497+
),
498+
pytest.param(
499+
True,
500+
["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00+00:00"],
501+
DatetimeIndex(
502+
["2000-01-01 09:00:00+00:00", "2000-01-01 02:00:00+00:00"],
503+
dtype="datetime64[ns, UTC]",
504+
),
505+
id="all tz-aware, mixed offsets, with utc",
506+
),
507+
],
508+
)
509+
@pytest.mark.parametrize(
510+
"constructor",
511+
[Timestamp, lambda x: Timestamp(x).to_pydatetime()],
512+
)
513+
def test_to_datetime_mixed_datetime_and_string_with_format(
514+
self, fmt, utc, input, expected, constructor
515+
):
516+
# https://github.com/pandas-dev/pandas/issues/49298
517+
# note: ISO8601 formats go down a fastpath, so we need to check both
518+
# a ISO8601 format and a non-ISO8601 one
519+
ts1 = constructor(input[0])
520+
ts2 = input[1]
521+
result = to_datetime([ts1, ts2], format=fmt, utc=utc)
522+
tm.assert_index_equal(result, expected)
523+
524+
@pytest.mark.parametrize(
525+
"fmt",
526+
["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"],
527+
ids=["non-ISO8601 format", "ISO8601 format"],
528+
)
529+
@pytest.mark.parametrize(
530+
"input",
531+
[
532+
pytest.param(
533+
["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-07:00"],
534+
id="all tz-aware, mixed timezones, without utc",
535+
),
536+
],
537+
)
538+
@pytest.mark.parametrize(
539+
"constructor",
540+
[Timestamp, lambda x: Timestamp(x).to_pydatetime()],
541+
)
542+
def test_to_datetime_mixed_datetime_and_string_with_format_raises(
543+
self, fmt, input, constructor
544+
):
545+
# https://github.com/pandas-dev/pandas/issues/49298
546+
# note: ISO8601 formats go down a fastpath, so we need to check both
547+
# a ISO8601 format and a non-ISO8601 one
548+
ts1 = constructor(input[0])
549+
ts2 = constructor(input[1])
550+
with pytest.raises(
551+
ValueError, match="cannot be converted to datetime64 unless utc=True"
552+
):
553+
to_datetime([ts1, ts2], format=fmt, utc=False)
554+
472555
@pytest.mark.parametrize("infer_datetime_format", [True, False])
473556
def test_to_datetime_np_str(self, infer_datetime_format):
474557
# GH#32264
@@ -609,98 +692,6 @@ def test_to_datetime_dtarr(self, tz):
609692
result = to_datetime(arr)
610693
assert result is arr
611694

612-
def test_to_datetime_arraylike_contains_pydatetime_and_timestamp(self):
613-
# GH 49298
614-
# Test explicit custom format
615-
case1 = [
616-
Timestamp("2001-10-01 12:00:01.123456789"),
617-
datetime(2001, 10, 2, 12, 30, 1, 123456),
618-
"10/03/01",
619-
]
620-
result = to_datetime(case1, format="%m/%d/%y")
621-
expected_data = [
622-
Timestamp("2001-10-01 12:00:01.123456789"),
623-
Timestamp("2001-10-02 12:30:01.123456"),
624-
Timestamp("2001-10-03 00:00:00"),
625-
]
626-
tm.assert_equal(result, DatetimeIndex(expected_data))
627-
628-
# Test ISO8601 format
629-
case2 = [
630-
Timestamp("2001-10-01 13:18:05"),
631-
datetime(2001, 10, 2, 13, 18, 5),
632-
"2001-10-03T13:18:05",
633-
"20011004",
634-
]
635-
result = to_datetime(case2)
636-
expected_data = [
637-
Timestamp("2001-10-01 13:18:05"),
638-
Timestamp("2001-10-02 13:18:05"),
639-
Timestamp("2001-10-03 13:18:05"),
640-
Timestamp("2001-10-04 00:00:00"),
641-
]
642-
tm.assert_equal(result, DatetimeIndex(expected_data))
643-
644-
def test_to_datetime_arraylike_contains_pydatetime_and_timestamp_with_tz(self):
645-
# GH 49298
646-
# Different offsets when utc=True
647-
data = [
648-
"20100102 121314 +01:00",
649-
"20100102 121315 -05:00",
650-
pytz.timezone("Europe/Berlin").localize(datetime(2010, 1, 2, 12, 13, 16)),
651-
pytz.timezone("US/Eastern").localize(Timestamp("2010-01-02 12:13:17")),
652-
]
653-
expected_data = [
654-
Timestamp("2010-01-02 11:13:14", tz="utc"),
655-
Timestamp("2010-01-02 17:13:15", tz="utc"),
656-
Timestamp("2010-01-02 11:13:16", tz="utc"),
657-
Timestamp("2010-01-02 17:13:17", tz="utc"),
658-
]
659-
result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=True)
660-
tm.assert_equal(result, DatetimeIndex(expected_data))
661-
662-
# Different offsets when utc=False
663-
expected_data = [
664-
Timestamp("2010-01-02 12:13:14 +01:00"),
665-
Timestamp("2010-01-02 12:13:15 -05:00"),
666-
Timestamp("2010-01-02 12:13:16 +01:00"),
667-
Timestamp("2010-01-02 12:13:17 -05:00"),
668-
]
669-
result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=False)
670-
tm.assert_equal(result, Index(expected_data))
671-
672-
@pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")])
673-
def test_to_datetime_includes_tz_dtype_on_pydatetime_and_timestamp(self, value):
674-
# GH 49298
675-
# No timezone
676-
result_no_format = to_datetime([value])
677-
result_with_format = to_datetime([value], format="%m-%d-%Y")
678-
tm.assert_equal(result_no_format, result_with_format)
679-
680-
# Localized value
681-
america_santiago = pytz.timezone("America/Santiago")
682-
result_no_format = to_datetime([america_santiago.localize(value)])
683-
result_with_format = to_datetime([america_santiago.localize(value)], format="%m-%d-%Y %z")
684-
tm.assert_equal(result_with_format.dtype.tz, america_santiago)
685-
tm.assert_equal(result_no_format, result_with_format)
686-
687-
@pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")])
688-
def test_to_datetime_mixing_naive_tzaware_raises(self, value):
689-
# GH 49298
690-
msg = "Cannot mix tz-aware with tz-naive values"
691-
america_santiago = pytz.timezone("America/Santiago")
692-
# Fail if format expects tz but input is not localized
693-
with pytest.raises(ValueError, match=msg):
694-
to_datetime([value], format="%m-%d-%Y %z")
695-
# Fail if format does not expect tz but input is localized
696-
with pytest.raises(ValueError, match=msg):
697-
to_datetime([america_santiago.localize(value)], format="%m-%d-%Y")
698-
# Mixed input should fail in both cases
699-
with pytest.raises(ValueError, match=msg):
700-
to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y %z")
701-
with pytest.raises(ValueError, match=msg):
702-
to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y")
703-
704695
def test_to_datetime_pydatetime(self):
705696
actual = to_datetime(datetime(2008, 1, 15))
706697
assert actual == datetime(2008, 1, 15)

0 commit comments

Comments
 (0)