Skip to content

Commit 0e6f338

Browse files
authored
REF: stricter typing, better naming in parsing.pyx (#50722)
* REF: stronger typing in parsing.pyx * REF: handle now and today outside parse_datetime_string * mypy fixup
1 parent 4ff1f3e commit 0e6f338

File tree

8 files changed

+97
-91
lines changed

8 files changed

+97
-91
lines changed

pandas/_libs/tslib.pyx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,15 @@ cdef _array_to_datetime_object(
770770
oresult[i] = "NaT"
771771
cnp.PyArray_MultiIter_NEXT(mi)
772772
continue
773+
elif val == "now":
774+
oresult[i] = datetime.now()
775+
cnp.PyArray_MultiIter_NEXT(mi)
776+
continue
777+
elif val == "today":
778+
oresult[i] = datetime.today()
779+
cnp.PyArray_MultiIter_NEXT(mi)
780+
continue
781+
773782
try:
774783
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
775784
yearfirst=yearfirst)

pandas/_libs/tslibs/parsing.pyi

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ from datetime import datetime
22

33
import numpy as np
44

5-
from pandas._libs.tslibs.offsets import BaseOffset
65
from pandas._typing import npt
76

87
class DateParseError(ValueError): ...
@@ -12,9 +11,9 @@ def parse_datetime_string(
1211
dayfirst: bool = ...,
1312
yearfirst: bool = ...,
1413
) -> datetime: ...
15-
def parse_time_string(
16-
arg: str,
17-
freq: BaseOffset | str | None = ...,
14+
def parse_datetime_string_with_reso(
15+
date_string: str,
16+
freq: str | None = ...,
1817
dayfirst: bool | None = ...,
1918
yearfirst: bool | None = ...,
2019
) -> tuple[datetime, str]: ...

pandas/_libs/tslibs/parsing.pyx

Lines changed: 31 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ from pandas._libs.tslibs.np_datetime cimport (
5959
npy_datetimestruct,
6060
string_to_dts,
6161
)
62-
from pandas._libs.tslibs.offsets cimport is_offset_object
6362
from pandas._libs.tslibs.strptime import array_strptime
6463
from pandas._libs.tslibs.util cimport (
6564
get_c_string_buf_and_size,
@@ -257,6 +256,10 @@ def parse_datetime_string(
257256
Returns
258257
-------
259258
datetime
259+
260+
Notes
261+
-----
262+
Does not handle "today" or "now", which caller is responsible for handling.
260263
"""
261264

262265
cdef:
@@ -275,14 +278,6 @@ def parse_datetime_string(
275278
if dt is not None:
276279
return dt
277280

278-
# Handling special case strings today & now
279-
if date_string == "now":
280-
dt = datetime.now()
281-
return dt
282-
elif date_string == "today":
283-
dt = datetime.today()
284-
return dt
285-
286281
try:
287282
dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq=None)
288283
return dt
@@ -321,16 +316,22 @@ def parse_datetime_string(
321316
return dt
322317

323318

324-
def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
319+
def parse_datetime_string_with_reso(
320+
str date_string, str freq=None, dayfirst=None, yearfirst=None
321+
):
322+
# NB: This will break with np.str_ (GH#45580) even though
323+
# isinstance(npstrobj, str) evaluates to True, so caller must ensure
324+
# the argument is *exactly* 'str'
325325
"""
326326
Try hard to parse datetime string, leveraging dateutil plus some extra
327327
goodies like quarter recognition.
328328
329329
Parameters
330330
----------
331-
arg : str
332-
freq : str or DateOffset, default None
331+
date_string : str
332+
freq : str or None, default None
333333
Helps with interpreting time string if supplied
334+
Corresponds to `offset.rule_code`
334335
dayfirst : bool, default None
335336
If None uses default from print_config
336337
yearfirst : bool, default None
@@ -341,50 +342,21 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
341342
datetime
342343
str
343344
Describing resolution of parsed string.
344-
"""
345-
if type(arg) is not str:
346-
# GH#45580 np.str_ satisfies isinstance(obj, str) but if we annotate
347-
# arg as "str" this raises here
348-
if not isinstance(arg, np.str_):
349-
raise TypeError(
350-
"Argument 'arg' has incorrect type "
351-
f"(expected str, got {type(arg).__name__})"
352-
)
353-
arg = str(arg)
354345
355-
if is_offset_object(freq):
356-
freq = freq.rule_code
346+
Raises
347+
------
348+
ValueError : preliminary check suggests string is not datetime
349+
DateParseError : error within dateutil
350+
"""
357351

358352
if dayfirst is None:
359353
dayfirst = get_option("display.date_dayfirst")
360354
if yearfirst is None:
361355
yearfirst = get_option("display.date_yearfirst")
362356

363-
res = parse_datetime_string_with_reso(arg, freq=freq,
364-
dayfirst=dayfirst,
365-
yearfirst=yearfirst)
366-
return res
367-
368-
369-
cdef parse_datetime_string_with_reso(
370-
str date_string, str freq=None, bint dayfirst=False, bint yearfirst=False,
371-
):
372-
"""
373-
Parse datetime string and try to identify its resolution.
374-
375-
Returns
376-
-------
377-
datetime
378-
str
379-
Inferred resolution of the parsed string.
380-
381-
Raises
382-
------
383-
ValueError : preliminary check suggests string is not datetime
384-
DateParseError : error within dateutil
385-
"""
386357
cdef:
387-
object parsed, reso
358+
datetime parsed
359+
str reso
388360
bint string_to_dts_failed
389361
npy_datetimestruct dts
390362
NPY_DATETIMEUNIT out_bestunit
@@ -496,7 +468,7 @@ cpdef bint _does_string_look_like_datetime(str py_string):
496468
cdef object _parse_dateabbr_string(object date_string, datetime default,
497469
str freq=None):
498470
cdef:
499-
object ret
471+
datetime ret
500472
# year initialized to prevent compiler warnings
501473
int year = -1, quarter = -1, month
502474
Py_ssize_t date_len
@@ -518,8 +490,8 @@ cdef object _parse_dateabbr_string(object date_string, datetime default,
518490
except ValueError:
519491
pass
520492

521-
try:
522-
if 4 <= date_len <= 7:
493+
if 4 <= date_len <= 7:
494+
try:
523495
i = date_string.index("Q", 1, 6)
524496
if i == 1:
525497
quarter = int(date_string[0])
@@ -566,19 +538,21 @@ cdef object _parse_dateabbr_string(object date_string, datetime default,
566538
ret = default.replace(year=year, month=month)
567539
return ret, "quarter"
568540

569-
except DateParseError:
570-
raise
571-
except ValueError:
572-
pass
541+
except DateParseError:
542+
raise
543+
except ValueError:
544+
# e.g. if "Q" is not in date_string and .index raised
545+
pass
573546

574547
if date_len == 6 and freq == "M":
575548
year = int(date_string[:4])
576549
month = int(date_string[4:6])
577550
try:
578551
ret = default.replace(year=year, month=month)
579552
return ret, "month"
580-
except ValueError:
581-
pass
553+
except ValueError as err:
554+
# We can infer that none of the patterns below will match
555+
raise ValueError(f"Unable to parse {date_string}") from err
582556

583557
for pat in ["%Y-%m", "%b %Y", "%b-%Y"]:
584558
try:

pandas/_libs/tslibs/period.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ from pandas._libs.tslibs.dtypes cimport (
8888
)
8989
from pandas._libs.tslibs.parsing cimport quarter_to_myear
9090

91-
from pandas._libs.tslibs.parsing import parse_time_string
91+
from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso
9292

9393
from pandas._libs.tslibs.nattype cimport (
9494
NPY_NAT,
@@ -2589,7 +2589,9 @@ class Period(_Period):
25892589

25902590
value = str(value)
25912591
value = value.upper()
2592-
dt, reso = parse_time_string(value, freq)
2592+
2593+
freqstr = freq.rule_code if freq is not None else None
2594+
dt, reso = parse_datetime_string_with_reso(value, freqstr)
25932595
try:
25942596
ts = Timestamp(value)
25952597
except ValueError:

pandas/core/indexes/datetimelike.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,18 @@ def _parse_with_reso(self, label: str):
241241
freq = self.freq
242242
except NotImplementedError:
243243
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
244-
parsed, reso_str = parsing.parse_time_string(label, freq)
244+
245+
freqstr: str | None
246+
if freq is not None and not isinstance(freq, str):
247+
freqstr = freq.rule_code
248+
else:
249+
freqstr = freq
250+
251+
if isinstance(label, np.str_):
252+
# GH#45580
253+
label = str(label)
254+
255+
parsed, reso_str = parsing.parse_datetime_string_with_reso(label, freqstr)
245256
reso = Resolution.from_attrname(reso_str)
246257
return parsed, reso
247258

pandas/tests/indexing/test_loc.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,9 @@ def test_loc_npstr(self):
276276
def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
277277
# GH#20684
278278
"""
279-
parse_time_string return parameter if type not matched.
280-
PeriodIndex.get_loc takes returned value from parse_time_string as a tuple.
279+
parse_datetime_string_with_reso return parameter if type not matched.
280+
PeriodIndex.get_loc takes returned value from parse_datetime_string_with_reso
281+
as a tuple.
281282
If first argument is Period and a tuple has 3 items,
282283
process go on not raise exception
283284
"""

pandas/tests/tools/test_to_datetime.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,10 +1625,14 @@ def test_mixed_offsets_with_native_datetime_raises(self):
16251625
"2015-03-14T16:15:14.123-08:00",
16261626
"2019-03-04T21:56:32.620-07:00",
16271627
None,
1628+
"today",
1629+
"now",
16281630
]
16291631
ser = Series(vals)
16301632
assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111
16311633

1634+
now = Timestamp("now")
1635+
today = Timestamp("today")
16321636
mixed = to_datetime(ser)
16331637
expected = Series(
16341638
[
@@ -1640,7 +1644,11 @@ def test_mixed_offsets_with_native_datetime_raises(self):
16401644
],
16411645
dtype=object,
16421646
)
1643-
tm.assert_series_equal(mixed, expected)
1647+
tm.assert_series_equal(mixed[:-2], expected)
1648+
# we'll check mixed[-1] and mixed[-2] match now and today to within
1649+
# call-timing tolerances
1650+
assert (now - mixed.iloc[-1]).total_seconds() <= 0.1
1651+
assert (today - mixed.iloc[-2]).total_seconds() <= 0.1
16441652

16451653
with pytest.raises(ValueError, match="Tz-aware datetime.datetime"):
16461654
to_datetime(mixed)
@@ -2903,7 +2911,9 @@ def test_parsers(self, date_str, expected, warning, cache):
29032911
# https://github.com/dateutil/dateutil/issues/217
29042912
yearfirst = True
29052913

2906-
result1, _ = parsing.parse_time_string(date_str, yearfirst=yearfirst)
2914+
result1, _ = parsing.parse_datetime_string_with_reso(
2915+
date_str, yearfirst=yearfirst
2916+
)
29072917
with tm.assert_produces_warning(warning, match="Could not infer format"):
29082918
result2 = to_datetime(date_str, yearfirst=yearfirst)
29092919
result3 = to_datetime([date_str], yearfirst=yearfirst)
@@ -2939,7 +2949,7 @@ def test_na_values_with_cache(
29392949

29402950
def test_parsers_nat(self):
29412951
# Test that each of several string-accepting methods return pd.NaT
2942-
result1, _ = parsing.parse_time_string("NaT")
2952+
result1, _ = parsing.parse_datetime_string_with_reso("NaT")
29432953
result2 = to_datetime("NaT")
29442954
result3 = Timestamp("NaT")
29452955
result4 = DatetimeIndex(["NaT"])[0]
@@ -3010,7 +3020,7 @@ def test_parsers_dayfirst_yearfirst(
30103020
dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst)
30113021
assert dateutil_result == expected
30123022

3013-
result1, _ = parsing.parse_time_string(
3023+
result1, _ = parsing.parse_datetime_string_with_reso(
30143024
date_str, dayfirst=dayfirst, yearfirst=yearfirst
30153025
)
30163026

@@ -3038,7 +3048,7 @@ def test_parsers_timestring(self, date_str, exp_def):
30383048
# must be the same as dateutil result
30393049
exp_now = parse(date_str)
30403050

3041-
result1, _ = parsing.parse_time_string(date_str)
3051+
result1, _ = parsing.parse_datetime_string_with_reso(date_str)
30423052
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
30433053
result2 = to_datetime(date_str)
30443054
result3 = to_datetime([date_str])

0 commit comments

Comments
 (0)