From 82f32526c33adba07c6be18fed785c3cd3c8e62f Mon Sep 17 00:00:00 2001 From: Antonio Ossa Guerra Date: Wed, 26 Oct 2022 13:16:33 -0300 Subject: [PATCH 01/10] parent 0168e27843efb96b56202f22fbe5dd720c346368 author Antonio Ossa Guerra 1666800993 -0300 committer MarcoGorelli <> 1669293453 +0000 Parse `datetime` properly in `pd.to_datetime` When applying `pd.to_datetime` on array-like structure that contain a `datetime.datetime` object, while using the `format` argument, a `ValueError` is raised because the `datetime.datetime` object does not match the expected format. The implemented solution looks for `datetime.datetime` instances in the `array_strptime` method. If an instance of this type is found, it's properly handled by the new `_parse_python_datetime_object`, which returns the expected Numpy datetime object. Signed-off-by: Antonio Ossa Guerra --- doc/source/whatsnew/v2.0.0.rst | 3 +- pandas/_libs/tslibs/strptime.pyx | 28 ++++++-- pandas/core/tools/datetimes.py | 2 +- pandas/tests/tools/test_to_datetime.py | 92 ++++++++++++++++++++++++++ 4 files changed, 117 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index fd7cc0598f850..d7ac82c571b3a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -64,8 +64,7 @@ Other enhancements - :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`) - :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`) - :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) -- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`) -- +- :func:`to_datetime` now skips ``datetime.datetime`` and :class:`Timestamp` objects when passing ``format`` argument instead of raising a ``ValueError``. (:issue:`49298`) .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index f540ad19c48d2..124ba1bb0b595 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -1,10 +1,14 @@ """Strptime-related classes and functions. """ from cpython.datetime cimport ( + PyDateTime_Check, date, + import_datetime, tzinfo, ) +import_datetime() + from _thread import allocate_lock as _thread_allocate_lock import numpy as np @@ -25,7 +29,9 @@ from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, npy_datetimestruct, npy_datetimestruct_to_datetime, + pydatetime_to_dt64, ) +from pandas._libs.tslibs.timestamps cimport _Timestamp cdef dict _parse_code_table = {'y': 0, @@ -121,6 +127,7 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai result_timezone = np.empty(n, dtype='object') dts.us = dts.ps = dts.as = 0 + expect_tz_aware = "%z" in fmt or "%Z" in fmt for i in range(n): val = values[i] @@ -128,12 +135,23 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai if val in nat_strings: iresult[i] = NPY_NAT continue - else: - if checknull_with_nat_and_na(val): - iresult[i] = NPY_NAT - continue + elif checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + continue + elif PyDateTime_Check(val): + if isinstance(val, _Timestamp): + iresult[i] = val.tz_localize(None)._as_unit("ns").value else: - val = str(val) + iresult[i] = pydatetime_to_dt64(val, &dts) + check_dts_bounds(&dts) + if val.tzinfo is None and expect_tz_aware: + raise ValueError("Cannot mix tz-aware with tz-naive values") + elif val.tzinfo is not None and not expect_tz_aware: + raise ValueError("Cannot mix tz-aware with tz-naive values") + result_timezone[i] = val.tzinfo + continue + else: + val = str(val) # exact matching if exact: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 97d054df8287f..9283ed486fca6 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -495,7 +495,7 @@ def _array_strptime_with_fallback( # Indicates to the caller to fallback to objects_to_datetime64ns return None else: - if "%Z" in fmt or "%z" in fmt: + if any(timezones): return _return_parsed_timezone_results(result, timezones, utc, name) return _box_as_indexlike(result, utc=utc, name=name) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4c70aeb3e36aa..02842f9d2494c 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -609,6 +609,98 @@ def test_to_datetime_dtarr(self, tz): result = to_datetime(arr) assert result is arr + def test_to_datetime_arraylike_contains_pydatetime_and_timestamp(self): + # GH 49298 + # Test explicit custom format + case1 = [ + Timestamp("2001-10-01 12:00:01.123456789"), + datetime(2001, 10, 2, 12, 30, 1, 123456), + "10/03/01", + ] + result = to_datetime(case1, format="%m/%d/%y") + expected_data = [ + Timestamp("2001-10-01 12:00:01.123456789"), + Timestamp("2001-10-02 12:30:01.123456"), + Timestamp("2001-10-03 00:00:00"), + ] + tm.assert_equal(result, DatetimeIndex(expected_data)) + + # Test ISO8601 format + case2 = [ + Timestamp("2001-10-01 13:18:05"), + datetime(2001, 10, 2, 13, 18, 5), + "2001-10-03T13:18:05", + "20011004", + ] + result = to_datetime(case2) + expected_data = [ + Timestamp("2001-10-01 13:18:05"), + Timestamp("2001-10-02 13:18:05"), + Timestamp("2001-10-03 13:18:05"), + Timestamp("2001-10-04 00:00:00"), + ] + tm.assert_equal(result, DatetimeIndex(expected_data)) + + def test_to_datetime_arraylike_contains_pydatetime_and_timestamp_with_tz(self): + # GH 49298 + # Different offsets when utc=True + data = [ + "20100102 121314 +01:00", + "20100102 121315 -05:00", + pytz.timezone("Europe/Berlin").localize(datetime(2010, 1, 2, 12, 13, 16)), + pytz.timezone("US/Eastern").localize(Timestamp("2010-01-02 12:13:17")), + ] + expected_data = [ + Timestamp("2010-01-02 11:13:14", tz="utc"), + Timestamp("2010-01-02 17:13:15", tz="utc"), + Timestamp("2010-01-02 11:13:16", tz="utc"), + Timestamp("2010-01-02 17:13:17", tz="utc"), + ] + result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=True) + tm.assert_equal(result, DatetimeIndex(expected_data)) + + # Different offsets when utc=False + expected_data = [ + Timestamp("2010-01-02 12:13:14 +01:00"), + Timestamp("2010-01-02 12:13:15 -05:00"), + Timestamp("2010-01-02 12:13:16 +01:00"), + Timestamp("2010-01-02 12:13:17 -05:00"), + ] + result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=False) + tm.assert_equal(result, Index(expected_data)) + + @pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")]) + def test_to_datetime_includes_tz_dtype_on_pydatetime_and_timestamp(self, value): + # GH 49298 + # No timezone + result_no_format = to_datetime([value]) + result_with_format = to_datetime([value], format="%m-%d-%Y") + tm.assert_equal(result_no_format, result_with_format) + + # Localized value + america_santiago = pytz.timezone("America/Santiago") + result_no_format = to_datetime([america_santiago.localize(value)]) + result_with_format = to_datetime([america_santiago.localize(value)], format="%m-%d-%Y %z") + tm.assert_equal(result_with_format.dtype.tz, america_santiago) + tm.assert_equal(result_no_format, result_with_format) + + @pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")]) + def test_to_datetime_mixing_naive_tzaware_raises(self, value): + # GH 49298 + msg = "Cannot mix tz-aware with tz-naive values" + america_santiago = pytz.timezone("America/Santiago") + # Fail if format expects tz but input is not localized + with pytest.raises(ValueError, match=msg): + to_datetime([value], format="%m-%d-%Y %z") + # Fail if format does not expect tz but input is localized + with pytest.raises(ValueError, match=msg): + to_datetime([america_santiago.localize(value)], format="%m-%d-%Y") + # Mixed input should fail in both cases + with pytest.raises(ValueError, match=msg): + to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y %z") + with pytest.raises(ValueError, match=msg): + to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y") + def test_to_datetime_pydatetime(self): actual = to_datetime(datetime(2008, 1, 15)) assert actual == datetime(2008, 1, 15) From 2008eb34d08bf486dc99b5077e1a17bf3352ae41 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 24 Nov 2022 12:35:52 +0000 Subject: [PATCH 02/10] fixup --- doc/source/whatsnew/v2.0.0.rst | 4 +- pandas/_libs/tslibs/strptime.pyx | 32 +++-- pandas/core/tools/datetimes.py | 15 ++- pandas/tests/tools/test_to_datetime.py | 175 ++++++++++++------------- 4 files changed, 122 insertions(+), 104 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d7ac82c571b3a..050cd6a341aeb 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -64,7 +64,8 @@ Other enhancements - :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`) - :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`) - :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) -- :func:`to_datetime` now skips ``datetime.datetime`` and :class:`Timestamp` objects when passing ``format`` argument instead of raising a ``ValueError``. (:issue:`49298`) +- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`) +- .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: @@ -631,6 +632,7 @@ Datetimelike - Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`) - Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) +- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp` or ``datetime`` objects with non-ISO8601 ``format`` (:issue:`49298`) - Timedelta diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 124ba1bb0b595..79944bc86a8cf 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -20,6 +20,7 @@ from numpy cimport ( ) from pandas._libs.missing cimport checknull_with_nat_and_na +from pandas._libs.tslibs.conversion cimport convert_timezone from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_nat_strings as nat_strings, @@ -59,7 +60,13 @@ cdef dict _parse_code_table = {'y': 0, 'u': 22} -def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='raise'): +def array_strptime( + ndarray[object] values, + str fmt, + bint exact=True, + errors='raise', + bint utc=False, +): """ Calculates the datetime structs represented by the passed array of strings @@ -84,6 +91,9 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' + bint found_naive = False + bint found_tz = False + tzinfo tz_out = None assert is_raise or is_ignore or is_coerce @@ -127,7 +137,6 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai result_timezone = np.empty(n, dtype='object') dts.us = dts.ps = dts.as = 0 - expect_tz_aware = "%z" in fmt or "%Z" in fmt for i in range(n): val = values[i] @@ -139,15 +148,22 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai iresult[i] = NPY_NAT continue elif PyDateTime_Check(val): + if val.tzinfo is not None: + found_tz = True + else: + found_naive = True + tz_out = convert_timezone( + val.tzinfo, + tz_out, + found_naive, + found_tz, + utc, + ) if isinstance(val, _Timestamp): - iresult[i] = val.tz_localize(None)._as_unit("ns").value + iresult[i] = val.tz_localize(None).as_unit("ns").value else: - iresult[i] = pydatetime_to_dt64(val, &dts) + iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts) check_dts_bounds(&dts) - if val.tzinfo is None and expect_tz_aware: - raise ValueError("Cannot mix tz-aware with tz-naive values") - elif val.tzinfo is not None and not expect_tz_aware: - raise ValueError("Cannot mix tz-aware with tz-naive values") result_timezone[i] = val.tzinfo continue else: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9283ed486fca6..0b703f82ff83c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -318,7 +318,14 @@ def _return_parsed_timezone_results( ) if utc: # Convert to the same tz - tz_results = np.array([tz_result.tz_convert("utc") for tz_result in tz_results]) + tz_results = np.array( + [ + tz_result.tz_convert("utc") + if tz_result.tzinfo is not None + else tz_result.tz_localize("utc") + for tz_result in tz_results + ] + ) return Index(tz_results, name=name) @@ -468,7 +475,9 @@ def _array_strptime_with_fallback( Call array_strptime, with fallback behavior depending on 'errors'. """ try: - result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors) + result, timezones = array_strptime( + arg, fmt, exact=exact, errors=errors, utc=utc + ) except OutOfBoundsDatetime: if errors == "raise": raise @@ -495,7 +504,7 @@ def _array_strptime_with_fallback( # Indicates to the caller to fallback to objects_to_datetime64ns return None else: - if any(timezones): + if any([i is not None for i in timezones]): return _return_parsed_timezone_results(result, timezones, utc, name) return _box_as_indexlike(result, utc=utc, name=name) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 02842f9d2494c..4ef0157e8012f 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -469,6 +469,89 @@ def test_to_datetime_mixed_datetime_and_string(self): expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60)) tm.assert_index_equal(res, expected) + @pytest.mark.parametrize( + "fmt", + ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"], + ids=["non-ISO8601 format", "ISO8601 format"], + ) + @pytest.mark.parametrize( + "utc, input, expected", + [ + pytest.param( + True, + ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"], + DatetimeIndex( + ["2000-01-01 09:00:00+00:00", "2000-01-01 10:00:00+00:00"], + dtype="datetime64[ns, UTC]", + ), + id="all tz-aware, with utc", + ), + pytest.param( + False, + ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"], + DatetimeIndex( + ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"], + tz=pytz.FixedOffset(-480), + ), + id="all tz-aware, without utc", + ), + pytest.param( + True, + ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00+00:00"], + DatetimeIndex( + ["2000-01-01 09:00:00+00:00", "2000-01-01 02:00:00+00:00"], + dtype="datetime64[ns, UTC]", + ), + id="all tz-aware, mixed offsets, with utc", + ), + ], + ) + @pytest.mark.parametrize( + "constructor", + [Timestamp, lambda x: Timestamp(x).to_pydatetime()], + ) + def test_to_datetime_mixed_datetime_and_string_with_format( + self, fmt, utc, input, expected, constructor + ): + # https://github.com/pandas-dev/pandas/issues/49298 + # note: ISO8601 formats go down a fastpath, so we need to check both + # a ISO8601 format and a non-ISO8601 one + ts1 = constructor(input[0]) + ts2 = input[1] + result = to_datetime([ts1, ts2], format=fmt, utc=utc) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "fmt", + ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"], + ids=["non-ISO8601 format", "ISO8601 format"], + ) + @pytest.mark.parametrize( + "input", + [ + pytest.param( + ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-07:00"], + id="all tz-aware, mixed timezones, without utc", + ), + ], + ) + @pytest.mark.parametrize( + "constructor", + [Timestamp, lambda x: Timestamp(x).to_pydatetime()], + ) + def test_to_datetime_mixed_datetime_and_string_with_format_raises( + self, fmt, input, constructor + ): + # https://github.com/pandas-dev/pandas/issues/49298 + # note: ISO8601 formats go down a fastpath, so we need to check both + # a ISO8601 format and a non-ISO8601 one + ts1 = constructor(input[0]) + ts2 = constructor(input[1]) + with pytest.raises( + ValueError, match="cannot be converted to datetime64 unless utc=True" + ): + to_datetime([ts1, ts2], format=fmt, utc=False) + @pytest.mark.parametrize("infer_datetime_format", [True, False]) def test_to_datetime_np_str(self, infer_datetime_format): # GH#32264 @@ -609,98 +692,6 @@ def test_to_datetime_dtarr(self, tz): result = to_datetime(arr) assert result is arr - def test_to_datetime_arraylike_contains_pydatetime_and_timestamp(self): - # GH 49298 - # Test explicit custom format - case1 = [ - Timestamp("2001-10-01 12:00:01.123456789"), - datetime(2001, 10, 2, 12, 30, 1, 123456), - "10/03/01", - ] - result = to_datetime(case1, format="%m/%d/%y") - expected_data = [ - Timestamp("2001-10-01 12:00:01.123456789"), - Timestamp("2001-10-02 12:30:01.123456"), - Timestamp("2001-10-03 00:00:00"), - ] - tm.assert_equal(result, DatetimeIndex(expected_data)) - - # Test ISO8601 format - case2 = [ - Timestamp("2001-10-01 13:18:05"), - datetime(2001, 10, 2, 13, 18, 5), - "2001-10-03T13:18:05", - "20011004", - ] - result = to_datetime(case2) - expected_data = [ - Timestamp("2001-10-01 13:18:05"), - Timestamp("2001-10-02 13:18:05"), - Timestamp("2001-10-03 13:18:05"), - Timestamp("2001-10-04 00:00:00"), - ] - tm.assert_equal(result, DatetimeIndex(expected_data)) - - def test_to_datetime_arraylike_contains_pydatetime_and_timestamp_with_tz(self): - # GH 49298 - # Different offsets when utc=True - data = [ - "20100102 121314 +01:00", - "20100102 121315 -05:00", - pytz.timezone("Europe/Berlin").localize(datetime(2010, 1, 2, 12, 13, 16)), - pytz.timezone("US/Eastern").localize(Timestamp("2010-01-02 12:13:17")), - ] - expected_data = [ - Timestamp("2010-01-02 11:13:14", tz="utc"), - Timestamp("2010-01-02 17:13:15", tz="utc"), - Timestamp("2010-01-02 11:13:16", tz="utc"), - Timestamp("2010-01-02 17:13:17", tz="utc"), - ] - result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=True) - tm.assert_equal(result, DatetimeIndex(expected_data)) - - # Different offsets when utc=False - expected_data = [ - Timestamp("2010-01-02 12:13:14 +01:00"), - Timestamp("2010-01-02 12:13:15 -05:00"), - Timestamp("2010-01-02 12:13:16 +01:00"), - Timestamp("2010-01-02 12:13:17 -05:00"), - ] - result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=False) - tm.assert_equal(result, Index(expected_data)) - - @pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")]) - def test_to_datetime_includes_tz_dtype_on_pydatetime_and_timestamp(self, value): - # GH 49298 - # No timezone - result_no_format = to_datetime([value]) - result_with_format = to_datetime([value], format="%m-%d-%Y") - tm.assert_equal(result_no_format, result_with_format) - - # Localized value - america_santiago = pytz.timezone("America/Santiago") - result_no_format = to_datetime([america_santiago.localize(value)]) - result_with_format = to_datetime([america_santiago.localize(value)], format="%m-%d-%Y %z") - tm.assert_equal(result_with_format.dtype.tz, america_santiago) - tm.assert_equal(result_no_format, result_with_format) - - @pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")]) - def test_to_datetime_mixing_naive_tzaware_raises(self, value): - # GH 49298 - msg = "Cannot mix tz-aware with tz-naive values" - america_santiago = pytz.timezone("America/Santiago") - # Fail if format expects tz but input is not localized - with pytest.raises(ValueError, match=msg): - to_datetime([value], format="%m-%d-%Y %z") - # Fail if format does not expect tz but input is localized - with pytest.raises(ValueError, match=msg): - to_datetime([america_santiago.localize(value)], format="%m-%d-%Y") - # Mixed input should fail in both cases - with pytest.raises(ValueError, match=msg): - to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y %z") - with pytest.raises(ValueError, match=msg): - to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y") - def test_to_datetime_pydatetime(self): actual = to_datetime(datetime(2008, 1, 15)) assert actual == datetime(2008, 1, 15) From 560a02a9fe1b0e1f67f55bcfa457e7b76419092e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 24 Nov 2022 13:43:15 +0000 Subject: [PATCH 03/10] :label: typing --- pandas/_libs/tslibs/strptime.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/strptime.pyi b/pandas/_libs/tslibs/strptime.pyi index 8e1acb2ff0d38..4565bb7ecf959 100644 --- a/pandas/_libs/tslibs/strptime.pyi +++ b/pandas/_libs/tslibs/strptime.pyi @@ -7,6 +7,7 @@ def array_strptime( fmt: str | None, exact: bool = ..., errors: str = ..., + utc: bool = ..., ) -> tuple[np.ndarray, np.ndarray]: ... # first ndarray is M8[ns], second is object ndarray of tzinfo | None From f38a7cd3566a332474ce0f35a2fdd14d20ee49b7 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 24 Nov 2022 15:05:17 +0000 Subject: [PATCH 04/10] ignore pylint nitpick --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 7b4c9425d557c..b73a4171a3175 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,7 @@ disable = [ "unsupported-assignment-operation", "unsupported-membership-test", "unused-import", + "use-a-generator", "use-implicit-booleaness-not-comparison", "use-implicit-booleaness-not-len", "wrong-import-order", From 5884f4ebb6f525951e8c3ff4ee23a5b5c088b053 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 24 Nov 2022 15:06:42 +0000 Subject: [PATCH 05/10] better naming --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0b703f82ff83c..430343beb630b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -504,7 +504,7 @@ def _array_strptime_with_fallback( # Indicates to the caller to fallback to objects_to_datetime64ns return None else: - if any([i is not None for i in timezones]): + if any(tz is not None for tz in timezones): return _return_parsed_timezone_results(result, timezones, utc, name) return _box_as_indexlike(result, utc=utc, name=name) From e665ada6d2ba7b24e9bacb0485f4b2752569d394 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 28 Nov 2022 19:19:11 +0000 Subject: [PATCH 06/10] keep use-a-generator check --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9137e56c26cd5..ad180ff5d59d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,6 @@ disable = [ "unsupported-assignment-operation", "unsupported-membership-test", "unused-import", - "use-a-generator", "use-implicit-booleaness-not-comparison", "use-implicit-booleaness-not-len", "wrong-import-order", From af272e12ba69a7d0c782b75636d090343e0d7cde Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 28 Nov 2022 19:25:15 +0000 Subject: [PATCH 07/10] use fromisoformat --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4ef0157e8012f..53e7798473cc8 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -508,7 +508,7 @@ def test_to_datetime_mixed_datetime_and_string(self): ) @pytest.mark.parametrize( "constructor", - [Timestamp, lambda x: Timestamp(x).to_pydatetime()], + [Timestamp, datetime.fromisoformat], ) def test_to_datetime_mixed_datetime_and_string_with_format( self, fmt, utc, input, expected, constructor From 12f5ac7f4b80a266f66ee43652cafaf3d9a9b1ba Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 28 Nov 2022 20:11:29 +0000 Subject: [PATCH 08/10] change awareness to be UTC --- pandas/tests/tools/test_to_datetime.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 53e7798473cc8..ce1babda841fc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -488,10 +488,9 @@ def test_to_datetime_mixed_datetime_and_string(self): ), pytest.param( False, - ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"], + ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"], DatetimeIndex( - ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"], - tz=pytz.FixedOffset(-480), + ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"], ), id="all tz-aware, without utc", ), From 06f1a53947f70b3fe74a4031ffdd2a15ff69857d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 28 Nov 2022 20:16:19 +0000 Subject: [PATCH 09/10] Revert "use fromisoformat" This reverts commit af272e12ba69a7d0c782b75636d090343e0d7cde. --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index ce1babda841fc..a5e5bcbb971ea 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -507,7 +507,7 @@ def test_to_datetime_mixed_datetime_and_string(self): ) @pytest.mark.parametrize( "constructor", - [Timestamp, datetime.fromisoformat], + [Timestamp, lambda x: Timestamp(x).to_pydatetime()], ) def test_to_datetime_mixed_datetime_and_string_with_format( self, fmt, utc, input, expected, constructor From ec27d427018f13789e1b776f662ff1a74fa1c8d1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 30 Nov 2022 15:52:35 +0000 Subject: [PATCH 10/10] rename input to args --- pandas/tests/tools/test_to_datetime.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a5e5bcbb971ea..ec4fbfeee2c3d 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -475,7 +475,7 @@ def test_to_datetime_mixed_datetime_and_string(self): ids=["non-ISO8601 format", "ISO8601 format"], ) @pytest.mark.parametrize( - "utc, input, expected", + "utc, args, expected", [ pytest.param( True, @@ -510,13 +510,13 @@ def test_to_datetime_mixed_datetime_and_string(self): [Timestamp, lambda x: Timestamp(x).to_pydatetime()], ) def test_to_datetime_mixed_datetime_and_string_with_format( - self, fmt, utc, input, expected, constructor + self, fmt, utc, args, expected, constructor ): # https://github.com/pandas-dev/pandas/issues/49298 # note: ISO8601 formats go down a fastpath, so we need to check both # a ISO8601 format and a non-ISO8601 one - ts1 = constructor(input[0]) - ts2 = input[1] + ts1 = constructor(args[0]) + ts2 = args[1] result = to_datetime([ts1, ts2], format=fmt, utc=utc) tm.assert_index_equal(result, expected) @@ -526,7 +526,7 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ids=["non-ISO8601 format", "ISO8601 format"], ) @pytest.mark.parametrize( - "input", + "args", [ pytest.param( ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-07:00"], @@ -539,13 +539,13 @@ def test_to_datetime_mixed_datetime_and_string_with_format( [Timestamp, lambda x: Timestamp(x).to_pydatetime()], ) def test_to_datetime_mixed_datetime_and_string_with_format_raises( - self, fmt, input, constructor + self, fmt, args, constructor ): # https://github.com/pandas-dev/pandas/issues/49298 # note: ISO8601 formats go down a fastpath, so we need to check both # a ISO8601 format and a non-ISO8601 one - ts1 = constructor(input[0]) - ts2 = constructor(input[1]) + ts1 = constructor(args[0]) + ts2 = constructor(args[1]) with pytest.raises( ValueError, match="cannot be converted to datetime64 unless utc=True" ):