Skip to content

REF: stricter typing, better naming in parsing.pyx #50722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,15 @@ cdef _array_to_datetime_object(
oresult[i] = "NaT"
cnp.PyArray_MultiIter_NEXT(mi)
continue
elif val == "now":
oresult[i] = datetime.now()
cnp.PyArray_MultiIter_NEXT(mi)
continue
elif val == "today":
oresult[i] = datetime.today()
cnp.PyArray_MultiIter_NEXT(mi)
continue

try:
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
yearfirst=yearfirst)
Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/tslibs/parsing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ from datetime import datetime

import numpy as np

from pandas._libs.tslibs.offsets import BaseOffset
from pandas._typing import npt

class DateParseError(ValueError): ...
Expand All @@ -12,9 +11,9 @@ def parse_datetime_string(
dayfirst: bool = ...,
yearfirst: bool = ...,
) -> datetime: ...
def parse_time_string(
arg: str,
freq: BaseOffset | str | None = ...,
def parse_datetime_string_with_reso(
date_string: str,
freq: str | None = ...,
dayfirst: bool | None = ...,
yearfirst: bool | None = ...,
) -> tuple[datetime, str]: ...
Expand Down
88 changes: 31 additions & 57 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct,
string_to_dts,
)
from pandas._libs.tslibs.offsets cimport is_offset_object
from pandas._libs.tslibs.strptime import array_strptime
from pandas._libs.tslibs.util cimport (
get_c_string_buf_and_size,
Expand Down Expand Up @@ -257,6 +256,10 @@ def parse_datetime_string(
Returns
-------
datetime

Notes
-----
Does not handle "today" or "now", which caller is responsible for handling.
"""

cdef:
Expand All @@ -275,14 +278,6 @@ def parse_datetime_string(
if dt is not None:
return dt

# Handling special case strings today & now
if date_string == "now":
dt = datetime.now()
return dt
elif date_string == "today":
dt = datetime.today()
return dt

try:
dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq=None)
return dt
Expand All @@ -308,16 +303,22 @@ def parse_datetime_string(
return dt


def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
def parse_datetime_string_with_reso(
str date_string, str freq=None, dayfirst=None, yearfirst=None
):
# NB: This will break with np.str_ (GH#45580) even though
# isinstance(npstrobj, str) evaluates to True, so caller must ensure
# the argument is *exactly* 'str'
"""
Try hard to parse datetime string, leveraging dateutil plus some extra
goodies like quarter recognition.

Parameters
----------
arg : str
freq : str or DateOffset, default None
date_string : str
freq : str or None, default None
Helps with interpreting time string if supplied
Corresponds to `offset.rule_code`
dayfirst : bool, default None
If None uses default from print_config
yearfirst : bool, default None
Expand All @@ -328,50 +329,21 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
datetime
str
Describing resolution of parsed string.
"""
if type(arg) is not str:
# GH#45580 np.str_ satisfies isinstance(obj, str) but if we annotate
# arg as "str" this raises here
if not isinstance(arg, np.str_):
raise TypeError(
"Argument 'arg' has incorrect type "
f"(expected str, got {type(arg).__name__})"
)
arg = str(arg)

if is_offset_object(freq):
freq = freq.rule_code
Raises
------
ValueError : preliminary check suggests string is not datetime
DateParseError : error within dateutil
"""

if dayfirst is None:
dayfirst = get_option("display.date_dayfirst")
if yearfirst is None:
yearfirst = get_option("display.date_yearfirst")

res = parse_datetime_string_with_reso(arg, freq=freq,
dayfirst=dayfirst,
yearfirst=yearfirst)
return res


cdef parse_datetime_string_with_reso(
str date_string, str freq=None, bint dayfirst=False, bint yearfirst=False,
):
"""
Parse datetime string and try to identify its resolution.

Returns
-------
datetime
str
Inferred resolution of the parsed string.

Raises
------
ValueError : preliminary check suggests string is not datetime
DateParseError : error within dateutil
"""
cdef:
object parsed, reso
datetime parsed
str reso
bint string_to_dts_failed
npy_datetimestruct dts
NPY_DATETIMEUNIT out_bestunit
Expand Down Expand Up @@ -483,7 +455,7 @@ cpdef bint _does_string_look_like_datetime(str py_string):
cdef object _parse_dateabbr_string(object date_string, datetime default,
str freq=None):
cdef:
object ret
datetime ret
# year initialized to prevent compiler warnings
int year = -1, quarter = -1, month
Py_ssize_t date_len
Expand All @@ -505,8 +477,8 @@ cdef object _parse_dateabbr_string(object date_string, datetime default,
except ValueError:
pass

try:
if 4 <= date_len <= 7:
if 4 <= date_len <= 7:
try:
i = date_string.index("Q", 1, 6)
if i == 1:
quarter = int(date_string[0])
Expand Down Expand Up @@ -553,19 +525,21 @@ cdef object _parse_dateabbr_string(object date_string, datetime default,
ret = default.replace(year=year, month=month)
return ret, "quarter"

except DateParseError:
raise
except ValueError:
pass
except DateParseError:
raise
except ValueError:
# e.g. if "Q" is not in date_string and .index raised
pass

if date_len == 6 and freq == "M":
year = int(date_string[:4])
month = int(date_string[4:6])
try:
ret = default.replace(year=year, month=month)
return ret, "month"
except ValueError:
pass
except ValueError as err:
# We can infer that none of the patterns below will match
raise ValueError(f"Unable to parse {date_string}") from err

for pat in ["%Y-%m", "%b %Y", "%b-%Y"]:
try:
Expand Down
6 changes: 4 additions & 2 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ from pandas._libs.tslibs.dtypes cimport (
)
from pandas._libs.tslibs.parsing cimport quarter_to_myear

from pandas._libs.tslibs.parsing import parse_time_string
from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso

from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
Expand Down Expand Up @@ -2589,7 +2589,9 @@ class Period(_Period):

value = str(value)
value = value.upper()
dt, reso = parse_time_string(value, freq)

freqstr = freq.rule_code if freq is not None else None
dt, reso = parse_datetime_string_with_reso(value, freqstr)
try:
ts = Timestamp(value)
except ValueError:
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,18 @@ def _parse_with_reso(self, label: str):
freq = self.freq
except NotImplementedError:
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
parsed, reso_str = parsing.parse_time_string(label, freq)

freqstr: str | None
if freq is not None and not isinstance(freq, str):
freqstr = freq.rule_code
else:
freqstr = freq

if isinstance(label, np.str_):
# GH#45580
label = str(label)

parsed, reso_str = parsing.parse_datetime_string_with_reso(label, freqstr)
reso = Resolution.from_attrname(reso_str)
return parsed, reso

Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,9 @@ def test_loc_npstr(self):
def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
# GH#20684
"""
parse_time_string return parameter if type not matched.
PeriodIndex.get_loc takes returned value from parse_time_string as a tuple.
parse_datetime_string_with_reso return parameter if type not matched.
PeriodIndex.get_loc takes returned value from parse_datetime_string_with_reso
as a tuple.
If first argument is Period and a tuple has 3 items,
process go on not raise exception
"""
Expand Down
20 changes: 15 additions & 5 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1623,10 +1623,14 @@ def test_mixed_offsets_with_native_datetime_raises(self):
"2015-03-14T16:15:14.123-08:00",
"2019-03-04T21:56:32.620-07:00",
None,
"today",
"now",
]
ser = Series(vals)
assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111

now = Timestamp("now")
today = Timestamp("today")
mixed = to_datetime(ser)
expected = Series(
[
Expand All @@ -1638,7 +1642,11 @@ def test_mixed_offsets_with_native_datetime_raises(self):
],
dtype=object,
)
tm.assert_series_equal(mixed, expected)
tm.assert_series_equal(mixed[:-2], expected)
# we'll check mixed[-1] and mixed[-2] match now and today to within
# call-timing tolerances
assert (now - mixed.iloc[-1]).total_seconds() <= 0.1
assert (today - mixed.iloc[-2]).total_seconds() <= 0.1

with pytest.raises(ValueError, match="Tz-aware datetime.datetime"):
to_datetime(mixed)
Expand Down Expand Up @@ -2901,7 +2909,9 @@ def test_parsers(self, date_str, expected, warning, cache):
# https://github.com/dateutil/dateutil/issues/217
yearfirst = True

result1, _ = parsing.parse_time_string(date_str, yearfirst=yearfirst)
result1, _ = parsing.parse_datetime_string_with_reso(
date_str, yearfirst=yearfirst
)
with tm.assert_produces_warning(warning, match="Could not infer format"):
result2 = to_datetime(date_str, yearfirst=yearfirst)
result3 = to_datetime([date_str], yearfirst=yearfirst)
Expand Down Expand Up @@ -2937,7 +2947,7 @@ def test_na_values_with_cache(

def test_parsers_nat(self):
# Test that each of several string-accepting methods return pd.NaT
result1, _ = parsing.parse_time_string("NaT")
result1, _ = parsing.parse_datetime_string_with_reso("NaT")
result2 = to_datetime("NaT")
result3 = Timestamp("NaT")
result4 = DatetimeIndex(["NaT"])[0]
Expand Down Expand Up @@ -3008,7 +3018,7 @@ def test_parsers_dayfirst_yearfirst(
dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst)
assert dateutil_result == expected

result1, _ = parsing.parse_time_string(
result1, _ = parsing.parse_datetime_string_with_reso(
date_str, dayfirst=dayfirst, yearfirst=yearfirst
)

Expand Down Expand Up @@ -3036,7 +3046,7 @@ def test_parsers_timestring(self, date_str, exp_def):
# must be the same as dateutil result
exp_now = parse(date_str)

result1, _ = parsing.parse_time_string(date_str)
result1, _ = parsing.parse_datetime_string_with_reso(date_str)
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
result2 = to_datetime(date_str)
result3 = to_datetime([date_str])
Expand Down
Loading