diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 13edf3c46152a..8246e24319dbd 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -1,22 +1,17 @@ -import re - cimport numpy as cnp cnp.import_array() from pandas._libs.tslibs.util cimport is_integer_object from pandas._libs.tslibs.offsets cimport is_offset_object - -# ---------------------------------------------------------------------- -# Constants - -# hack to handle WOM-1MON -opattern = re.compile( - r'([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' +from pandas._libs.tslibs.offsets import ( + INVALID_FREQ_ERR_MSG, + _dont_uppercase, + _lite_rule_alias, + base_and_stride, + opattern, ) -INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" - # --------------------------------------------------------------------- # Period codes @@ -103,27 +98,6 @@ _period_code_map.update({ "W": 4000, # Weekly "C": 5000}) # Custom Business Day -_lite_rule_alias = { - 'W': 'W-SUN', - 'Q': 'Q-DEC', - - 'A': 'A-DEC', # YearEnd(month=12), - 'Y': 'A-DEC', - 'AS': 'AS-JAN', # YearBegin(month=1), - 'YS': 'AS-JAN', - 'BA': 'BA-DEC', # BYearEnd(month=12), - 'BY': 'BA-DEC', - 'BAS': 'BAS-JAN', # BYearBegin(month=1), - 'BYS': 'BAS-JAN', - - 'Min': 'T', - 'min': 'T', - 'ms': 'L', - 'us': 'U', - 'ns': 'N'} - -_dont_uppercase = {'MS', 'ms'} - # Map attribute-name resolutions to resolution abbreviations _attrname_to_abbrevs = { "year": "A", @@ -223,36 +197,6 @@ cpdef get_freq_code(freqstr): return code, stride -cpdef base_and_stride(str freqstr): - """ - Return base freq and stride info from string representation - - Returns - ------- - base : str - stride : int - - Examples - -------- - _freq_and_stride('5Min') -> 'Min', 5 - """ - groups = opattern.match(freqstr) - - if not groups: - raise ValueError(f"Could not evaluate {freqstr}") - - stride = groups.group(1) - - if len(stride): - stride = int(stride) - else: - stride = 1 - - base = groups.group(2) - - return base, stride - - cpdef _period_str_to_code(str freqstr): freqstr = _lite_rule_alias.get(freqstr, freqstr) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd index e75cd8bdf1baf..69b878c77f0b8 100644 --- a/pandas/_libs/tslibs/offsets.pxd +++ b/pandas/_libs/tslibs/offsets.pxd @@ -1,3 +1,3 @@ -cdef to_offset(object obj) +cpdef to_offset(object obj) cdef bint is_offset_object(object obj) cdef bint is_tick_object(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8e5634253bd39..a32ffb8aa3689 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,6 +1,7 @@ import cython import operator +import re import time from typing import Any import warnings @@ -103,17 +104,6 @@ cdef bint is_tick_object(object obj): return isinstance(obj, Tick) -cdef to_offset(object obj): - """ - Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime - imports - """ - if isinstance(obj, BaseOffset): - return obj - from pandas.tseries.frequencies import to_offset - return to_offset(obj) - - cdef datetime _as_datetime(datetime obj): if isinstance(obj, ABCTimestamp): return obj.to_pydatetime() @@ -3505,6 +3495,9 @@ CBMonthEnd = CustomBusinessMonthEnd CBMonthBegin = CustomBusinessMonthBegin CDay = CustomBusinessDay +# ---------------------------------------------------------------------- +# to_offset helpers + prefix_mapping = { offset._prefix: offset for offset in [ @@ -3542,6 +3535,224 @@ prefix_mapping = { ] } +_name_to_offset_map = { + "days": Day(1), + "hours": Hour(1), + "minutes": Minute(1), + "seconds": Second(1), + "milliseconds": Milli(1), + "microseconds": Micro(1), + "nanoseconds": Nano(1), +} + +# hack to handle WOM-1MON +opattern = re.compile( + r"([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)" +) + +_lite_rule_alias = { + "W": "W-SUN", + "Q": "Q-DEC", + + "A": "A-DEC", # YearEnd(month=12), + "Y": "A-DEC", + "AS": "AS-JAN", # YearBegin(month=1), + "YS": "AS-JAN", + "BA": "BA-DEC", # BYearEnd(month=12), + "BY": "BA-DEC", + "BAS": "BAS-JAN", # BYearBegin(month=1), + "BYS": "BAS-JAN", + + "Min": "T", + "min": "T", + "ms": "L", + "us": "U", + "ns": "N", +} + +_dont_uppercase = {"MS", "ms"} + +INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" + +# TODO: still needed? +# cache of previously seen offsets +_offset_map = {} + + +cpdef base_and_stride(str freqstr): + """ + Return base freq and stride info from string representation + + Returns + ------- + base : str + stride : int + + Examples + -------- + _freq_and_stride('5Min') -> 'Min', 5 + """ + groups = opattern.match(freqstr) + + if not groups: + raise ValueError(f"Could not evaluate {freqstr}") + + stride = groups.group(1) + + if len(stride): + stride = int(stride) + else: + stride = 1 + + base = groups.group(2) + + return base, stride + + +# TODO: better name? +def _get_offset(name: str) -> BaseOffset: + """ + Return DateOffset object associated with rule name. + + Examples + -------- + _get_offset('EOM') --> BMonthEnd(1) + """ + if name not in _dont_uppercase: + name = name.upper() + name = _lite_rule_alias.get(name, name) + name = _lite_rule_alias.get(name.lower(), name) + else: + name = _lite_rule_alias.get(name, name) + + if name not in _offset_map: + try: + split = name.split("-") + klass = prefix_mapping[split[0]] + # handles case where there's no suffix (and will TypeError if too + # many '-') + offset = klass._from_name(*split[1:]) + except (ValueError, TypeError, KeyError) as err: + # bad prefix or suffix + raise ValueError(INVALID_FREQ_ERR_MSG.format(name)) from err + # cache + _offset_map[name] = offset + + return _offset_map[name] + + +cpdef to_offset(freq): + """ + Return DateOffset object from string or tuple representation + or datetime.timedelta object. + + Parameters + ---------- + freq : str, tuple, datetime.timedelta, DateOffset or None + + Returns + ------- + DateOffset or None + + Raises + ------ + ValueError + If freq is an invalid frequency + + See Also + -------- + DateOffset : Standard kind of date increment used for a date range. + + Examples + -------- + >>> to_offset("5min") + <5 * Minutes> + + >>> to_offset("1D1H") + <25 * Hours> + + >>> to_offset(("W", 2)) + <2 * Weeks: weekday=6> + + >>> to_offset((2, "B")) + <2 * BusinessDays> + + >>> to_offset(pd.Timedelta(days=1)) + + + >>> to_offset(Hour()) + + """ + if freq is None: + return None + + if isinstance(freq, BaseOffset): + return freq + + if isinstance(freq, tuple): + name = freq[0] + stride = freq[1] + if isinstance(stride, str): + name, stride = stride, name + name, _ = base_and_stride(name) + delta = _get_offset(name) * stride + + elif isinstance(freq, timedelta): + from .timedeltas import Timedelta + + delta = None + freq = Timedelta(freq) + try: + for name in freq.components._fields: + offset = _name_to_offset_map[name] + stride = getattr(freq.components, name) + if stride != 0: + offset = stride * offset + if delta is None: + delta = offset + else: + delta = delta + offset + except ValueError as err: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err + + else: + delta = None + stride_sign = None + try: + split = re.split(opattern, freq) + if split[-1] != "" and not split[-1].isspace(): + # the last element must be blank + raise ValueError("last element must be blank") + for sep, stride, name in zip(split[0::4], split[1::4], split[2::4]): + if sep != "" and not sep.isspace(): + raise ValueError("separator must be spaces") + prefix = _lite_rule_alias.get(name) or name + if stride_sign is None: + stride_sign = -1 if stride.startswith("-") else 1 + if not stride: + stride = 1 + + from .resolution import Resolution # TODO: avoid runtime import + + if prefix in Resolution.reso_str_bump_map: + stride, name = Resolution.get_stride_from_decimal( + float(stride), prefix + ) + stride = int(stride) + offset = _get_offset(name) + offset = offset * int(np.fabs(stride) * stride_sign) + if delta is None: + delta = offset + else: + delta = delta + offset + except (ValueError, TypeError) as err: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err + + if delta is None: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) + + return delta + # ---------------------------------------------------------------------- # RelativeDelta Arithmetic diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 350bf8c38e6bf..86cc7ff753660 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -17,7 +17,7 @@ get_freq_str, ) import pandas._libs.tslibs.offsets as liboffsets -from pandas._libs.tslibs.offsets import ApplyTypeError +from pandas._libs.tslibs.offsets import ApplyTypeError, _get_offset, _offset_map import pandas.compat as compat from pandas.compat.numpy import np_datetime64_compat from pandas.errors import PerformanceWarning @@ -27,7 +27,6 @@ from pandas.core.series import Series from pandas.io.pickle import read_pickle -from pandas.tseries.frequencies import _get_offset, _offset_map from pandas.tseries.holiday import USFederalHolidayCalendar import pandas.tseries.offsets as offsets from pandas.tseries.offsets import ( diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 084ad4294f9d0..47ae66ac4f91b 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,29 +1,21 @@ -from datetime import timedelta -import re -from typing import Dict, Optional +from typing import Optional import warnings import numpy as np from pandas._libs.algos import unique_deltas -from pandas._libs.tslibs import Timedelta, Timestamp +from pandas._libs.tslibs import Timestamp from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, MONTH_NUMBERS, int_to_weekday from pandas._libs.tslibs.fields import build_field_sarray -import pandas._libs.tslibs.frequencies as libfreqs -from pandas._libs.tslibs.offsets import ( +from pandas._libs.tslibs.offsets import ( # noqa:F401 DateOffset, Day, - Hour, - Micro, - Milli, - Minute, - Nano, - Second, + _get_offset, _offset_to_period_map, - prefix_mapping, + to_offset, ) from pandas._libs.tslibs.parsing import get_rule_month -from pandas._libs.tslibs.resolution import Resolution, month_position_check +from pandas._libs.tslibs.resolution import month_position_check from pandas._libs.tslibs.timezones import UTC from pandas._libs.tslibs.tzconversion import tz_convert from pandas.util._decorators import cache_readonly @@ -47,9 +39,6 @@ # --------------------------------------------------------------------- # Offset names ("time rules") and related functions -#: cache of previously seen offsets -_offset_map: Dict[str, DateOffset] = {} - def get_period_alias(offset_str: str) -> Optional[str]: """ @@ -58,126 +47,6 @@ def get_period_alias(offset_str: str) -> Optional[str]: return _offset_to_period_map.get(offset_str, None) -_name_to_offset_map = { - "days": Day(1), - "hours": Hour(1), - "minutes": Minute(1), - "seconds": Second(1), - "milliseconds": Milli(1), - "microseconds": Micro(1), - "nanoseconds": Nano(1), -} - - -def to_offset(freq) -> Optional[DateOffset]: - """ - Return DateOffset object from string or tuple representation - or datetime.timedelta object. - - Parameters - ---------- - freq : str, tuple, datetime.timedelta, DateOffset or None - - Returns - ------- - DateOffset - None if freq is None. - - Raises - ------ - ValueError - If freq is an invalid frequency - - See Also - -------- - DateOffset : Standard kind of date increment used for a date range. - - Examples - -------- - >>> to_offset("5min") - <5 * Minutes> - - >>> to_offset("1D1H") - <25 * Hours> - - >>> to_offset(("W", 2)) - <2 * Weeks: weekday=6> - - >>> to_offset((2, "B")) - <2 * BusinessDays> - - >>> to_offset(pd.Timedelta(days=1)) - - - >>> to_offset(Hour()) - - """ - if freq is None: - return None - - if isinstance(freq, DateOffset): - return freq - - if isinstance(freq, tuple): - name = freq[0] - stride = freq[1] - if isinstance(stride, str): - name, stride = stride, name - name, _ = libfreqs.base_and_stride(name) - delta = _get_offset(name) * stride - - elif isinstance(freq, timedelta): - delta = None - freq = Timedelta(freq) - try: - for name in freq.components._fields: - offset = _name_to_offset_map[name] - stride = getattr(freq.components, name) - if stride != 0: - offset = stride * offset - if delta is None: - delta = offset - else: - delta = delta + offset - except ValueError as err: - raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) from err - - else: - delta = None - stride_sign = None - try: - split = re.split(libfreqs.opattern, freq) - if split[-1] != "" and not split[-1].isspace(): - # the last element must be blank - raise ValueError("last element must be blank") - for sep, stride, name in zip(split[0::4], split[1::4], split[2::4]): - if sep != "" and not sep.isspace(): - raise ValueError("separator must be spaces") - prefix = libfreqs._lite_rule_alias.get(name) or name - if stride_sign is None: - stride_sign = -1 if stride.startswith("-") else 1 - if not stride: - stride = 1 - if prefix in Resolution.reso_str_bump_map: - stride, name = Resolution.get_stride_from_decimal( - float(stride), prefix - ) - stride = int(stride) - offset = _get_offset(name) - offset = offset * int(np.fabs(stride) * stride_sign) - if delta is None: - delta = offset - else: - delta = delta + offset - except (ValueError, TypeError) as err: - raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) from err - - if delta is None: - raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) - - return delta - - def get_offset(name: str) -> DateOffset: """ Return DateOffset object associated with rule name. @@ -197,37 +66,6 @@ def get_offset(name: str) -> DateOffset: return _get_offset(name) -def _get_offset(name: str) -> DateOffset: - """ - Return DateOffset object associated with rule name. - - Examples - -------- - _get_offset('EOM') --> BMonthEnd(1) - """ - if name not in libfreqs._dont_uppercase: - name = name.upper() - name = libfreqs._lite_rule_alias.get(name, name) - name = libfreqs._lite_rule_alias.get(name.lower(), name) - else: - name = libfreqs._lite_rule_alias.get(name, name) - - if name not in _offset_map: - try: - split = name.split("-") - klass = prefix_mapping[split[0]] - # handles case where there's no suffix (and will TypeError if too - # many '-') - offset = klass._from_name(*split[1:]) - except (ValueError, TypeError, KeyError) as err: - # bad prefix or suffix - raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) from err - # cache - _offset_map[name] = offset - - return _offset_map[name] - - # --------------------------------------------------------------------- # Period codes