Skip to content

Commit b23498e

Browse files
authored
REF: move to_offset to liboffsets (#34420)
1 parent c9d183d commit b23498e

File tree

5 files changed

+236
-244
lines changed

5 files changed

+236
-244
lines changed

pandas/_libs/tslibs/frequencies.pyx

Lines changed: 6 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,17 @@
1-
import re
2-
31
cimport numpy as cnp
42
cnp.import_array()
53

64
from pandas._libs.tslibs.util cimport is_integer_object
75

86
from pandas._libs.tslibs.offsets cimport is_offset_object
9-
10-
# ----------------------------------------------------------------------
11-
# Constants
12-
13-
# hack to handle WOM-1MON
14-
opattern = re.compile(
15-
r'([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
7+
from pandas._libs.tslibs.offsets import (
8+
INVALID_FREQ_ERR_MSG,
9+
_dont_uppercase,
10+
_lite_rule_alias,
11+
base_and_stride,
12+
opattern,
1613
)
1714

18-
INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
19-
2015
# ---------------------------------------------------------------------
2116
# Period codes
2217

@@ -103,27 +98,6 @@ _period_code_map.update({
10398
"W": 4000, # Weekly
10499
"C": 5000}) # Custom Business Day
105100

106-
_lite_rule_alias = {
107-
'W': 'W-SUN',
108-
'Q': 'Q-DEC',
109-
110-
'A': 'A-DEC', # YearEnd(month=12),
111-
'Y': 'A-DEC',
112-
'AS': 'AS-JAN', # YearBegin(month=1),
113-
'YS': 'AS-JAN',
114-
'BA': 'BA-DEC', # BYearEnd(month=12),
115-
'BY': 'BA-DEC',
116-
'BAS': 'BAS-JAN', # BYearBegin(month=1),
117-
'BYS': 'BAS-JAN',
118-
119-
'Min': 'T',
120-
'min': 'T',
121-
'ms': 'L',
122-
'us': 'U',
123-
'ns': 'N'}
124-
125-
_dont_uppercase = {'MS', 'ms'}
126-
127101
# Map attribute-name resolutions to resolution abbreviations
128102
_attrname_to_abbrevs = {
129103
"year": "A",
@@ -223,36 +197,6 @@ cpdef get_freq_code(freqstr):
223197
return code, stride
224198

225199

226-
cpdef base_and_stride(str freqstr):
227-
"""
228-
Return base freq and stride info from string representation
229-
230-
Returns
231-
-------
232-
base : str
233-
stride : int
234-
235-
Examples
236-
--------
237-
_freq_and_stride('5Min') -> 'Min', 5
238-
"""
239-
groups = opattern.match(freqstr)
240-
241-
if not groups:
242-
raise ValueError(f"Could not evaluate {freqstr}")
243-
244-
stride = groups.group(1)
245-
246-
if len(stride):
247-
stride = int(stride)
248-
else:
249-
stride = 1
250-
251-
base = groups.group(2)
252-
253-
return base, stride
254-
255-
256200
cpdef _period_str_to_code(str freqstr):
257201
freqstr = _lite_rule_alias.get(freqstr, freqstr)
258202

pandas/_libs/tslibs/offsets.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
cdef to_offset(object obj)
1+
cpdef to_offset(object obj)
22
cdef bint is_offset_object(object obj)
33
cdef bint is_tick_object(object obj)

pandas/_libs/tslibs/offsets.pyx

Lines changed: 222 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import cython
22

33
import operator
4+
import re
45
import time
56
from typing import Any
67
import warnings
@@ -103,17 +104,6 @@ cdef bint is_tick_object(object obj):
103104
return isinstance(obj, Tick)
104105

105106

106-
cdef to_offset(object obj):
107-
"""
108-
Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime
109-
imports
110-
"""
111-
if isinstance(obj, BaseOffset):
112-
return obj
113-
from pandas.tseries.frequencies import to_offset
114-
return to_offset(obj)
115-
116-
117107
cdef datetime _as_datetime(datetime obj):
118108
if isinstance(obj, ABCTimestamp):
119109
return obj.to_pydatetime()
@@ -3505,6 +3495,9 @@ CBMonthEnd = CustomBusinessMonthEnd
35053495
CBMonthBegin = CustomBusinessMonthBegin
35063496
CDay = CustomBusinessDay
35073497

3498+
# ----------------------------------------------------------------------
3499+
# to_offset helpers
3500+
35083501
prefix_mapping = {
35093502
offset._prefix: offset
35103503
for offset in [
@@ -3542,6 +3535,224 @@ prefix_mapping = {
35423535
]
35433536
}
35443537

3538+
_name_to_offset_map = {
3539+
"days": Day(1),
3540+
"hours": Hour(1),
3541+
"minutes": Minute(1),
3542+
"seconds": Second(1),
3543+
"milliseconds": Milli(1),
3544+
"microseconds": Micro(1),
3545+
"nanoseconds": Nano(1),
3546+
}
3547+
3548+
# hack to handle WOM-1MON
3549+
opattern = re.compile(
3550+
r"([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)"
3551+
)
3552+
3553+
_lite_rule_alias = {
3554+
"W": "W-SUN",
3555+
"Q": "Q-DEC",
3556+
3557+
"A": "A-DEC", # YearEnd(month=12),
3558+
"Y": "A-DEC",
3559+
"AS": "AS-JAN", # YearBegin(month=1),
3560+
"YS": "AS-JAN",
3561+
"BA": "BA-DEC", # BYearEnd(month=12),
3562+
"BY": "BA-DEC",
3563+
"BAS": "BAS-JAN", # BYearBegin(month=1),
3564+
"BYS": "BAS-JAN",
3565+
3566+
"Min": "T",
3567+
"min": "T",
3568+
"ms": "L",
3569+
"us": "U",
3570+
"ns": "N",
3571+
}
3572+
3573+
_dont_uppercase = {"MS", "ms"}
3574+
3575+
INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
3576+
3577+
# TODO: still needed?
3578+
# cache of previously seen offsets
3579+
_offset_map = {}
3580+
3581+
3582+
cpdef base_and_stride(str freqstr):
3583+
"""
3584+
Return base freq and stride info from string representation
3585+
3586+
Returns
3587+
-------
3588+
base : str
3589+
stride : int
3590+
3591+
Examples
3592+
--------
3593+
_freq_and_stride('5Min') -> 'Min', 5
3594+
"""
3595+
groups = opattern.match(freqstr)
3596+
3597+
if not groups:
3598+
raise ValueError(f"Could not evaluate {freqstr}")
3599+
3600+
stride = groups.group(1)
3601+
3602+
if len(stride):
3603+
stride = int(stride)
3604+
else:
3605+
stride = 1
3606+
3607+
base = groups.group(2)
3608+
3609+
return base, stride
3610+
3611+
3612+
# TODO: better name?
3613+
def _get_offset(name: str) -> BaseOffset:
3614+
"""
3615+
Return DateOffset object associated with rule name.
3616+
3617+
Examples
3618+
--------
3619+
_get_offset('EOM') --> BMonthEnd(1)
3620+
"""
3621+
if name not in _dont_uppercase:
3622+
name = name.upper()
3623+
name = _lite_rule_alias.get(name, name)
3624+
name = _lite_rule_alias.get(name.lower(), name)
3625+
else:
3626+
name = _lite_rule_alias.get(name, name)
3627+
3628+
if name not in _offset_map:
3629+
try:
3630+
split = name.split("-")
3631+
klass = prefix_mapping[split[0]]
3632+
# handles case where there's no suffix (and will TypeError if too
3633+
# many '-')
3634+
offset = klass._from_name(*split[1:])
3635+
except (ValueError, TypeError, KeyError) as err:
3636+
# bad prefix or suffix
3637+
raise ValueError(INVALID_FREQ_ERR_MSG.format(name)) from err
3638+
# cache
3639+
_offset_map[name] = offset
3640+
3641+
return _offset_map[name]
3642+
3643+
3644+
cpdef to_offset(freq):
3645+
"""
3646+
Return DateOffset object from string or tuple representation
3647+
or datetime.timedelta object.
3648+
3649+
Parameters
3650+
----------
3651+
freq : str, tuple, datetime.timedelta, DateOffset or None
3652+
3653+
Returns
3654+
-------
3655+
DateOffset or None
3656+
3657+
Raises
3658+
------
3659+
ValueError
3660+
If freq is an invalid frequency
3661+
3662+
See Also
3663+
--------
3664+
DateOffset : Standard kind of date increment used for a date range.
3665+
3666+
Examples
3667+
--------
3668+
>>> to_offset("5min")
3669+
<5 * Minutes>
3670+
3671+
>>> to_offset("1D1H")
3672+
<25 * Hours>
3673+
3674+
>>> to_offset(("W", 2))
3675+
<2 * Weeks: weekday=6>
3676+
3677+
>>> to_offset((2, "B"))
3678+
<2 * BusinessDays>
3679+
3680+
>>> to_offset(pd.Timedelta(days=1))
3681+
<Day>
3682+
3683+
>>> to_offset(Hour())
3684+
<Hour>
3685+
"""
3686+
if freq is None:
3687+
return None
3688+
3689+
if isinstance(freq, BaseOffset):
3690+
return freq
3691+
3692+
if isinstance(freq, tuple):
3693+
name = freq[0]
3694+
stride = freq[1]
3695+
if isinstance(stride, str):
3696+
name, stride = stride, name
3697+
name, _ = base_and_stride(name)
3698+
delta = _get_offset(name) * stride
3699+
3700+
elif isinstance(freq, timedelta):
3701+
from .timedeltas import Timedelta
3702+
3703+
delta = None
3704+
freq = Timedelta(freq)
3705+
try:
3706+
for name in freq.components._fields:
3707+
offset = _name_to_offset_map[name]
3708+
stride = getattr(freq.components, name)
3709+
if stride != 0:
3710+
offset = stride * offset
3711+
if delta is None:
3712+
delta = offset
3713+
else:
3714+
delta = delta + offset
3715+
except ValueError as err:
3716+
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err
3717+
3718+
else:
3719+
delta = None
3720+
stride_sign = None
3721+
try:
3722+
split = re.split(opattern, freq)
3723+
if split[-1] != "" and not split[-1].isspace():
3724+
# the last element must be blank
3725+
raise ValueError("last element must be blank")
3726+
for sep, stride, name in zip(split[0::4], split[1::4], split[2::4]):
3727+
if sep != "" and not sep.isspace():
3728+
raise ValueError("separator must be spaces")
3729+
prefix = _lite_rule_alias.get(name) or name
3730+
if stride_sign is None:
3731+
stride_sign = -1 if stride.startswith("-") else 1
3732+
if not stride:
3733+
stride = 1
3734+
3735+
from .resolution import Resolution # TODO: avoid runtime import
3736+
3737+
if prefix in Resolution.reso_str_bump_map:
3738+
stride, name = Resolution.get_stride_from_decimal(
3739+
float(stride), prefix
3740+
)
3741+
stride = int(stride)
3742+
offset = _get_offset(name)
3743+
offset = offset * int(np.fabs(stride) * stride_sign)
3744+
if delta is None:
3745+
delta = offset
3746+
else:
3747+
delta = delta + offset
3748+
except (ValueError, TypeError) as err:
3749+
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err
3750+
3751+
if delta is None:
3752+
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq))
3753+
3754+
return delta
3755+
35453756

35463757
# ----------------------------------------------------------------------
35473758
# RelativeDelta Arithmetic

pandas/tests/tseries/offsets/test_offsets.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
get_freq_str,
1818
)
1919
import pandas._libs.tslibs.offsets as liboffsets
20-
from pandas._libs.tslibs.offsets import ApplyTypeError
20+
from pandas._libs.tslibs.offsets import ApplyTypeError, _get_offset, _offset_map
2121
import pandas.compat as compat
2222
from pandas.compat.numpy import np_datetime64_compat
2323
from pandas.errors import PerformanceWarning
@@ -27,7 +27,6 @@
2727
from pandas.core.series import Series
2828

2929
from pandas.io.pickle import read_pickle
30-
from pandas.tseries.frequencies import _get_offset, _offset_map
3130
from pandas.tseries.holiday import USFederalHolidayCalendar
3231
import pandas.tseries.offsets as offsets
3332
from pandas.tseries.offsets import (

0 commit comments

Comments
 (0)