diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 10bcf6c9eabbf..03450c53781c7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -770,6 +770,15 @@ cdef _array_to_datetime_object( oresult[i] = "NaT" cnp.PyArray_MultiIter_NEXT(mi) continue + elif val == "now": + oresult[i] = datetime.now() + cnp.PyArray_MultiIter_NEXT(mi) + continue + elif val == "today": + oresult[i] = datetime.today() + cnp.PyArray_MultiIter_NEXT(mi) + continue + try: oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index a4440ffff5be9..c5d53f77762f9 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -2,7 +2,6 @@ from datetime import datetime import numpy as np -from pandas._libs.tslibs.offsets import BaseOffset from pandas._typing import npt class DateParseError(ValueError): ... @@ -12,9 +11,9 @@ def parse_datetime_string( dayfirst: bool = ..., yearfirst: bool = ..., ) -> datetime: ... -def parse_time_string( - arg: str, - freq: BaseOffset | str | None = ..., +def parse_datetime_string_with_reso( + date_string: str, + freq: str | None = ..., dayfirst: bool | None = ..., yearfirst: bool | None = ..., ) -> tuple[datetime, str]: ... diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index dabeab3e30f4d..36794d9a539de 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -59,7 +59,6 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, string_to_dts, ) -from pandas._libs.tslibs.offsets cimport is_offset_object from pandas._libs.tslibs.strptime import array_strptime from pandas._libs.tslibs.util cimport ( get_c_string_buf_and_size, @@ -257,6 +256,10 @@ def parse_datetime_string( Returns ------- datetime + + Notes + ----- + Does not handle "today" or "now", which caller is responsible for handling. """ cdef: @@ -275,14 +278,6 @@ def parse_datetime_string( if dt is not None: return dt - # Handling special case strings today & now - if date_string == "now": - dt = datetime.now() - return dt - elif date_string == "today": - dt = datetime.today() - return dt - try: dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq=None) return dt @@ -308,16 +303,22 @@ def parse_datetime_string( return dt -def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): +def parse_datetime_string_with_reso( + str date_string, str freq=None, dayfirst=None, yearfirst=None +): + # NB: This will break with np.str_ (GH#45580) even though + # isinstance(npstrobj, str) evaluates to True, so caller must ensure + # the argument is *exactly* 'str' """ Try hard to parse datetime string, leveraging dateutil plus some extra goodies like quarter recognition. Parameters ---------- - arg : str - freq : str or DateOffset, default None + date_string : str + freq : str or None, default None Helps with interpreting time string if supplied + Corresponds to `offset.rule_code` dayfirst : bool, default None If None uses default from print_config yearfirst : bool, default None @@ -328,50 +329,21 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): datetime str Describing resolution of parsed string. - """ - if type(arg) is not str: - # GH#45580 np.str_ satisfies isinstance(obj, str) but if we annotate - # arg as "str" this raises here - if not isinstance(arg, np.str_): - raise TypeError( - "Argument 'arg' has incorrect type " - f"(expected str, got {type(arg).__name__})" - ) - arg = str(arg) - if is_offset_object(freq): - freq = freq.rule_code + Raises + ------ + ValueError : preliminary check suggests string is not datetime + DateParseError : error within dateutil + """ if dayfirst is None: dayfirst = get_option("display.date_dayfirst") if yearfirst is None: yearfirst = get_option("display.date_yearfirst") - res = parse_datetime_string_with_reso(arg, freq=freq, - dayfirst=dayfirst, - yearfirst=yearfirst) - return res - - -cdef parse_datetime_string_with_reso( - str date_string, str freq=None, bint dayfirst=False, bint yearfirst=False, -): - """ - Parse datetime string and try to identify its resolution. - - Returns - ------- - datetime - str - Inferred resolution of the parsed string. - - Raises - ------ - ValueError : preliminary check suggests string is not datetime - DateParseError : error within dateutil - """ cdef: - object parsed, reso + datetime parsed + str reso bint string_to_dts_failed npy_datetimestruct dts NPY_DATETIMEUNIT out_bestunit @@ -483,7 +455,7 @@ cpdef bint _does_string_look_like_datetime(str py_string): cdef object _parse_dateabbr_string(object date_string, datetime default, str freq=None): cdef: - object ret + datetime ret # year initialized to prevent compiler warnings int year = -1, quarter = -1, month Py_ssize_t date_len @@ -505,8 +477,8 @@ cdef object _parse_dateabbr_string(object date_string, datetime default, except ValueError: pass - try: - if 4 <= date_len <= 7: + if 4 <= date_len <= 7: + try: i = date_string.index("Q", 1, 6) if i == 1: quarter = int(date_string[0]) @@ -553,10 +525,11 @@ cdef object _parse_dateabbr_string(object date_string, datetime default, ret = default.replace(year=year, month=month) return ret, "quarter" - except DateParseError: - raise - except ValueError: - pass + except DateParseError: + raise + except ValueError: + # e.g. if "Q" is not in date_string and .index raised + pass if date_len == 6 and freq == "M": year = int(date_string[:4]) @@ -564,8 +537,9 @@ cdef object _parse_dateabbr_string(object date_string, datetime default, try: ret = default.replace(year=year, month=month) return ret, "month" - except ValueError: - pass + except ValueError as err: + # We can infer that none of the patterns below will match + raise ValueError(f"Unable to parse {date_string}") from err for pat in ["%Y-%m", "%b %Y", "%b-%Y"]: try: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f93afc0d1c3f2..64bd76adb0ae2 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -88,7 +88,7 @@ from pandas._libs.tslibs.dtypes cimport ( ) from pandas._libs.tslibs.parsing cimport quarter_to_myear -from pandas._libs.tslibs.parsing import parse_time_string +from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso from pandas._libs.tslibs.nattype cimport ( NPY_NAT, @@ -2589,7 +2589,9 @@ class Period(_Period): value = str(value) value = value.upper() - dt, reso = parse_time_string(value, freq) + + freqstr = freq.rule_code if freq is not None else None + dt, reso = parse_datetime_string_with_reso(value, freqstr) try: ts = Timestamp(value) except ValueError: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 002b9832e9e6e..ea0c93e75f496 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -241,7 +241,18 @@ def _parse_with_reso(self, label: str): freq = self.freq except NotImplementedError: freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) - parsed, reso_str = parsing.parse_time_string(label, freq) + + freqstr: str | None + if freq is not None and not isinstance(freq, str): + freqstr = freq.rule_code + else: + freqstr = freq + + if isinstance(label, np.str_): + # GH#45580 + label = str(label) + + parsed, reso_str = parsing.parse_datetime_string_with_reso(label, freqstr) reso = Resolution.from_attrname(reso_str) return parsed, reso diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index cb65ecf411118..50d0c649fffc2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -276,8 +276,9 @@ def test_loc_npstr(self): def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): # GH#20684 """ - parse_time_string return parameter if type not matched. - PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. + parse_datetime_string_with_reso return parameter if type not matched. + PeriodIndex.get_loc takes returned value from parse_datetime_string_with_reso + as a tuple. If first argument is Period and a tuple has 3 items, process go on not raise exception """ diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index dd1012d57d6bc..75cec7931edcc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1623,10 +1623,14 @@ def test_mixed_offsets_with_native_datetime_raises(self): "2015-03-14T16:15:14.123-08:00", "2019-03-04T21:56:32.620-07:00", None, + "today", + "now", ] ser = Series(vals) assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111 + now = Timestamp("now") + today = Timestamp("today") mixed = to_datetime(ser) expected = Series( [ @@ -1638,7 +1642,11 @@ def test_mixed_offsets_with_native_datetime_raises(self): ], dtype=object, ) - tm.assert_series_equal(mixed, expected) + tm.assert_series_equal(mixed[:-2], expected) + # we'll check mixed[-1] and mixed[-2] match now and today to within + # call-timing tolerances + assert (now - mixed.iloc[-1]).total_seconds() <= 0.1 + assert (today - mixed.iloc[-2]).total_seconds() <= 0.1 with pytest.raises(ValueError, match="Tz-aware datetime.datetime"): to_datetime(mixed) @@ -2901,7 +2909,9 @@ def test_parsers(self, date_str, expected, warning, cache): # https://github.com/dateutil/dateutil/issues/217 yearfirst = True - result1, _ = parsing.parse_time_string(date_str, yearfirst=yearfirst) + result1, _ = parsing.parse_datetime_string_with_reso( + date_str, yearfirst=yearfirst + ) with tm.assert_produces_warning(warning, match="Could not infer format"): result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) @@ -2937,7 +2947,7 @@ def test_na_values_with_cache( def test_parsers_nat(self): # Test that each of several string-accepting methods return pd.NaT - result1, _ = parsing.parse_time_string("NaT") + result1, _ = parsing.parse_datetime_string_with_reso("NaT") result2 = to_datetime("NaT") result3 = Timestamp("NaT") result4 = DatetimeIndex(["NaT"])[0] @@ -3008,7 +3018,7 @@ def test_parsers_dayfirst_yearfirst( dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst) assert dateutil_result == expected - result1, _ = parsing.parse_time_string( + result1, _ = parsing.parse_datetime_string_with_reso( date_str, dayfirst=dayfirst, yearfirst=yearfirst ) @@ -3036,7 +3046,7 @@ def test_parsers_timestring(self, date_str, exp_def): # must be the same as dateutil result exp_now = parse(date_str) - result1, _ = parsing.parse_time_string(date_str) + result1, _ = parsing.parse_datetime_string_with_reso(date_str) with tm.assert_produces_warning(UserWarning, match="Could not infer format"): result2 = to_datetime(date_str) result3 = to_datetime([date_str]) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index c3a989cee7b02..33fce7b351513 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -12,31 +12,31 @@ parsing, strptime, ) -from pandas._libs.tslibs.parsing import parse_time_string +from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso import pandas.util._test_decorators as td import pandas._testing as tm -def test_parse_time_string(): - (parsed, reso) = parse_time_string("4Q1984") - (parsed_lower, reso_lower) = parse_time_string("4q1984") +def test_parse_datetime_string_with_reso(): + (parsed, reso) = parse_datetime_string_with_reso("4Q1984") + (parsed_lower, reso_lower) = parse_datetime_string_with_reso("4q1984") assert reso == reso_lower assert parsed == parsed_lower -def test_parse_time_string_nanosecond_reso(): +def test_parse_datetime_string_with_reso_nanosecond_reso(): # GH#46811 - parsed, reso = parse_time_string("2022-04-20 09:19:19.123456789") + parsed, reso = parse_datetime_string_with_reso("2022-04-20 09:19:19.123456789") assert reso == "nanosecond" -def test_parse_time_string_invalid_type(): +def test_parse_datetime_string_with_reso_invalid_type(): # Raise on invalid input, don't just return it - msg = "Argument 'arg' has incorrect type (expected str, got tuple)" + msg = "Argument 'date_string' has incorrect type (expected str, got tuple)" with pytest.raises(TypeError, match=re.escape(msg)): - parse_time_string((4, 5)) + parse_datetime_string_with_reso((4, 5)) @pytest.mark.parametrize( @@ -44,8 +44,8 @@ def test_parse_time_string_invalid_type(): ) def test_parse_time_quarter_with_dash(dashed, normal): # see gh-9688 - (parsed_dash, reso_dash) = parse_time_string(dashed) - (parsed, reso) = parse_time_string(normal) + (parsed_dash, reso_dash) = parse_datetime_string_with_reso(dashed) + (parsed, reso) = parse_datetime_string_with_reso(normal) assert parsed_dash == parsed assert reso_dash == reso @@ -56,7 +56,7 @@ def test_parse_time_quarter_with_dash_error(dashed): msg = f"Unknown datetime string format, unable to parse: {dashed}" with pytest.raises(parsing.DateParseError, match=msg): - parse_time_string(dashed) + parse_datetime_string_with_reso(dashed) @pytest.mark.parametrize( @@ -103,7 +103,7 @@ def test_does_not_convert_mixed_integer(date_string, expected): ) def test_parsers_quarterly_with_freq_error(date_str, kwargs, msg): with pytest.raises(parsing.DateParseError, match=msg): - parsing.parse_time_string(date_str, **kwargs) + parsing.parse_datetime_string_with_reso(date_str, **kwargs) @pytest.mark.parametrize( @@ -115,7 +115,7 @@ def test_parsers_quarterly_with_freq_error(date_str, kwargs, msg): ], ) def test_parsers_quarterly_with_freq(date_str, freq, expected): - result, _ = parsing.parse_time_string(date_str, freq=freq) + result, _ = parsing.parse_datetime_string_with_reso(date_str, freq=freq) assert result == expected @@ -132,7 +132,7 @@ def test_parsers_quarter_invalid(date_str): msg = f"Unknown datetime string format, unable to parse: {date_str}" with pytest.raises(ValueError, match=msg): - parsing.parse_time_string(date_str) + parsing.parse_datetime_string_with_reso(date_str) @pytest.mark.parametrize( @@ -140,7 +140,7 @@ def test_parsers_quarter_invalid(date_str): [("201101", datetime(2011, 1, 1, 0, 0)), ("200005", datetime(2000, 5, 1, 0, 0))], ) def test_parsers_month_freq(date_str, expected): - result, _ = parsing.parse_time_string(date_str, freq="M") + result, _ = parsing.parse_datetime_string_with_reso(date_str, freq="M") assert result == expected @@ -284,13 +284,13 @@ def test_try_parse_dates(): tm.assert_numpy_array_equal(result, expected) -def test_parse_time_string_check_instance_type_raise_exception(): +def test_parse_datetime_string_with_reso_check_instance_type_raise_exception(): # issue 20684 - msg = "Argument 'arg' has incorrect type (expected str, got tuple)" + msg = "Argument 'date_string' has incorrect type (expected str, got tuple)" with pytest.raises(TypeError, match=re.escape(msg)): - parse_time_string((1, 2, 3)) + parse_datetime_string_with_reso((1, 2, 3)) - result = parse_time_string("2019") + result = parse_datetime_string_with_reso("2019") expected = (datetime(2019, 1, 1), "year") assert result == expected