Skip to content

REF: move to_offset to liboffsets #34420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 6 additions & 62 deletions pandas/_libs/tslibs/frequencies.pyx
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
import re

cimport numpy as cnp
cnp.import_array()

from pandas._libs.tslibs.util cimport is_integer_object

from pandas._libs.tslibs.offsets cimport is_offset_object

# ----------------------------------------------------------------------
# Constants

# hack to handle WOM-1MON
opattern = re.compile(
r'([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
from pandas._libs.tslibs.offsets import (
INVALID_FREQ_ERR_MSG,
_dont_uppercase,
_lite_rule_alias,
base_and_stride,
opattern,
)

INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"

# ---------------------------------------------------------------------
# Period codes

Expand Down Expand Up @@ -103,27 +98,6 @@ _period_code_map.update({
"W": 4000, # Weekly
"C": 5000}) # Custom Business Day

_lite_rule_alias = {
'W': 'W-SUN',
'Q': 'Q-DEC',

'A': 'A-DEC', # YearEnd(month=12),
'Y': 'A-DEC',
'AS': 'AS-JAN', # YearBegin(month=1),
'YS': 'AS-JAN',
'BA': 'BA-DEC', # BYearEnd(month=12),
'BY': 'BA-DEC',
'BAS': 'BAS-JAN', # BYearBegin(month=1),
'BYS': 'BAS-JAN',

'Min': 'T',
'min': 'T',
'ms': 'L',
'us': 'U',
'ns': 'N'}

_dont_uppercase = {'MS', 'ms'}

# Map attribute-name resolutions to resolution abbreviations
_attrname_to_abbrevs = {
"year": "A",
Expand Down Expand Up @@ -223,36 +197,6 @@ cpdef get_freq_code(freqstr):
return code, stride


cpdef base_and_stride(str freqstr):
"""
Return base freq and stride info from string representation

Returns
-------
base : str
stride : int

Examples
--------
_freq_and_stride('5Min') -> 'Min', 5
"""
groups = opattern.match(freqstr)

if not groups:
raise ValueError(f"Could not evaluate {freqstr}")

stride = groups.group(1)

if len(stride):
stride = int(stride)
else:
stride = 1

base = groups.group(2)

return base, stride


cpdef _period_str_to_code(str freqstr):
freqstr = _lite_rule_alias.get(freqstr, freqstr)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/offsets.pxd
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
cdef to_offset(object obj)
cpdef to_offset(object obj)
cdef bint is_offset_object(object obj)
cdef bint is_tick_object(object obj)
233 changes: 222 additions & 11 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import cython

import operator
import re
import time
from typing import Any
import warnings
Expand Down Expand Up @@ -103,17 +104,6 @@ cdef bint is_tick_object(object obj):
return isinstance(obj, Tick)


cdef to_offset(object obj):
"""
Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime
imports
"""
if isinstance(obj, BaseOffset):
return obj
from pandas.tseries.frequencies import to_offset
return to_offset(obj)


cdef datetime _as_datetime(datetime obj):
if isinstance(obj, ABCTimestamp):
return obj.to_pydatetime()
Expand Down Expand Up @@ -3505,6 +3495,9 @@ CBMonthEnd = CustomBusinessMonthEnd
CBMonthBegin = CustomBusinessMonthBegin
CDay = CustomBusinessDay

# ----------------------------------------------------------------------
# to_offset helpers

prefix_mapping = {
offset._prefix: offset
for offset in [
Expand Down Expand Up @@ -3542,6 +3535,224 @@ prefix_mapping = {
]
}

_name_to_offset_map = {
"days": Day(1),
"hours": Hour(1),
"minutes": Minute(1),
"seconds": Second(1),
"milliseconds": Milli(1),
"microseconds": Micro(1),
"nanoseconds": Nano(1),
}

# hack to handle WOM-1MON
opattern = re.compile(
r"([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)"
)

_lite_rule_alias = {
"W": "W-SUN",
"Q": "Q-DEC",

"A": "A-DEC", # YearEnd(month=12),
"Y": "A-DEC",
"AS": "AS-JAN", # YearBegin(month=1),
"YS": "AS-JAN",
"BA": "BA-DEC", # BYearEnd(month=12),
"BY": "BA-DEC",
"BAS": "BAS-JAN", # BYearBegin(month=1),
"BYS": "BAS-JAN",

"Min": "T",
"min": "T",
"ms": "L",
"us": "U",
"ns": "N",
}

_dont_uppercase = {"MS", "ms"}

INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"

# TODO: still needed?
# cache of previously seen offsets
_offset_map = {}


cpdef base_and_stride(str freqstr):
"""
Return base freq and stride info from string representation

Returns
-------
base : str
stride : int

Examples
--------
_freq_and_stride('5Min') -> 'Min', 5
"""
groups = opattern.match(freqstr)

if not groups:
raise ValueError(f"Could not evaluate {freqstr}")

stride = groups.group(1)

if len(stride):
stride = int(stride)
else:
stride = 1

base = groups.group(2)

return base, stride


# TODO: better name?
def _get_offset(name: str) -> BaseOffset:
"""
Return DateOffset object associated with rule name.

Examples
--------
_get_offset('EOM') --> BMonthEnd(1)
"""
if name not in _dont_uppercase:
name = name.upper()
name = _lite_rule_alias.get(name, name)
name = _lite_rule_alias.get(name.lower(), name)
else:
name = _lite_rule_alias.get(name, name)

if name not in _offset_map:
try:
split = name.split("-")
klass = prefix_mapping[split[0]]
# handles case where there's no suffix (and will TypeError if too
# many '-')
offset = klass._from_name(*split[1:])
except (ValueError, TypeError, KeyError) as err:
# bad prefix or suffix
raise ValueError(INVALID_FREQ_ERR_MSG.format(name)) from err
# cache
_offset_map[name] = offset

return _offset_map[name]


cpdef to_offset(freq):
"""
Return DateOffset object from string or tuple representation
or datetime.timedelta object.

Parameters
----------
freq : str, tuple, datetime.timedelta, DateOffset or None

Returns
-------
DateOffset or None

Raises
------
ValueError
If freq is an invalid frequency

See Also
--------
DateOffset : Standard kind of date increment used for a date range.

Examples
--------
>>> to_offset("5min")
<5 * Minutes>

>>> to_offset("1D1H")
<25 * Hours>

>>> to_offset(("W", 2))
<2 * Weeks: weekday=6>

>>> to_offset((2, "B"))
<2 * BusinessDays>

>>> to_offset(pd.Timedelta(days=1))
<Day>

>>> to_offset(Hour())
<Hour>
"""
if freq is None:
return None

if isinstance(freq, BaseOffset):
return freq

if isinstance(freq, tuple):
name = freq[0]
stride = freq[1]
if isinstance(stride, str):
name, stride = stride, name
name, _ = base_and_stride(name)
delta = _get_offset(name) * stride

elif isinstance(freq, timedelta):
from .timedeltas import Timedelta

delta = None
freq = Timedelta(freq)
try:
for name in freq.components._fields:
offset = _name_to_offset_map[name]
stride = getattr(freq.components, name)
if stride != 0:
offset = stride * offset
if delta is None:
delta = offset
else:
delta = delta + offset
except ValueError as err:
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err

else:
delta = None
stride_sign = None
try:
split = re.split(opattern, freq)
if split[-1] != "" and not split[-1].isspace():
# the last element must be blank
raise ValueError("last element must be blank")
for sep, stride, name in zip(split[0::4], split[1::4], split[2::4]):
if sep != "" and not sep.isspace():
raise ValueError("separator must be spaces")
prefix = _lite_rule_alias.get(name) or name
if stride_sign is None:
stride_sign = -1 if stride.startswith("-") else 1
if not stride:
stride = 1

from .resolution import Resolution # TODO: avoid runtime import

if prefix in Resolution.reso_str_bump_map:
stride, name = Resolution.get_stride_from_decimal(
float(stride), prefix
)
stride = int(stride)
offset = _get_offset(name)
offset = offset * int(np.fabs(stride) * stride_sign)
if delta is None:
delta = offset
else:
delta = delta + offset
except (ValueError, TypeError) as err:
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err

if delta is None:
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq))

return delta


# ----------------------------------------------------------------------
# RelativeDelta Arithmetic
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
get_freq_str,
)
import pandas._libs.tslibs.offsets as liboffsets
from pandas._libs.tslibs.offsets import ApplyTypeError
from pandas._libs.tslibs.offsets import ApplyTypeError, _get_offset, _offset_map
import pandas.compat as compat
from pandas.compat.numpy import np_datetime64_compat
from pandas.errors import PerformanceWarning
Expand All @@ -27,7 +27,6 @@
from pandas.core.series import Series

from pandas.io.pickle import read_pickle
from pandas.tseries.frequencies import _get_offset, _offset_map
from pandas.tseries.holiday import USFederalHolidayCalendar
import pandas.tseries.offsets as offsets
from pandas.tseries.offsets import (
Expand Down
Loading