|
1 | 1 | import cython
|
2 | 2 |
|
3 | 3 | import operator
|
| 4 | +import re |
4 | 5 | import time
|
5 | 6 | from typing import Any
|
6 | 7 | import warnings
|
@@ -103,17 +104,6 @@ cdef bint is_tick_object(object obj):
|
103 | 104 | return isinstance(obj, Tick)
|
104 | 105 |
|
105 | 106 |
|
106 |
| -cdef to_offset(object obj): |
107 |
| - """ |
108 |
| - Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime |
109 |
| - imports |
110 |
| - """ |
111 |
| - if isinstance(obj, BaseOffset): |
112 |
| - return obj |
113 |
| - from pandas.tseries.frequencies import to_offset |
114 |
| - return to_offset(obj) |
115 |
| - |
116 |
| - |
117 | 107 | cdef datetime _as_datetime(datetime obj):
|
118 | 108 | if isinstance(obj, ABCTimestamp):
|
119 | 109 | return obj.to_pydatetime()
|
@@ -3505,6 +3495,9 @@ CBMonthEnd = CustomBusinessMonthEnd
|
3505 | 3495 | CBMonthBegin = CustomBusinessMonthBegin
|
3506 | 3496 | CDay = CustomBusinessDay
|
3507 | 3497 |
|
| 3498 | +# ---------------------------------------------------------------------- |
| 3499 | +# to_offset helpers |
| 3500 | + |
3508 | 3501 | prefix_mapping = {
|
3509 | 3502 | offset._prefix: offset
|
3510 | 3503 | for offset in [
|
@@ -3542,6 +3535,224 @@ prefix_mapping = {
|
3542 | 3535 | ]
|
3543 | 3536 | }
|
3544 | 3537 |
|
| 3538 | +_name_to_offset_map = { |
| 3539 | + "days": Day(1), |
| 3540 | + "hours": Hour(1), |
| 3541 | + "minutes": Minute(1), |
| 3542 | + "seconds": Second(1), |
| 3543 | + "milliseconds": Milli(1), |
| 3544 | + "microseconds": Micro(1), |
| 3545 | + "nanoseconds": Nano(1), |
| 3546 | +} |
| 3547 | + |
| 3548 | +# hack to handle WOM-1MON |
| 3549 | +opattern = re.compile( |
| 3550 | + r"([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)" |
| 3551 | +) |
| 3552 | + |
| 3553 | +_lite_rule_alias = { |
| 3554 | + "W": "W-SUN", |
| 3555 | + "Q": "Q-DEC", |
| 3556 | + |
| 3557 | + "A": "A-DEC", # YearEnd(month=12), |
| 3558 | + "Y": "A-DEC", |
| 3559 | + "AS": "AS-JAN", # YearBegin(month=1), |
| 3560 | + "YS": "AS-JAN", |
| 3561 | + "BA": "BA-DEC", # BYearEnd(month=12), |
| 3562 | + "BY": "BA-DEC", |
| 3563 | + "BAS": "BAS-JAN", # BYearBegin(month=1), |
| 3564 | + "BYS": "BAS-JAN", |
| 3565 | + |
| 3566 | + "Min": "T", |
| 3567 | + "min": "T", |
| 3568 | + "ms": "L", |
| 3569 | + "us": "U", |
| 3570 | + "ns": "N", |
| 3571 | +} |
| 3572 | + |
| 3573 | +_dont_uppercase = {"MS", "ms"} |
| 3574 | + |
| 3575 | +INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" |
| 3576 | + |
| 3577 | +# TODO: still needed? |
| 3578 | +# cache of previously seen offsets |
| 3579 | +_offset_map = {} |
| 3580 | + |
| 3581 | + |
| 3582 | +cpdef base_and_stride(str freqstr): |
| 3583 | + """ |
| 3584 | + Return base freq and stride info from string representation |
| 3585 | +
|
| 3586 | + Returns |
| 3587 | + ------- |
| 3588 | + base : str |
| 3589 | + stride : int |
| 3590 | +
|
| 3591 | + Examples |
| 3592 | + -------- |
| 3593 | + _freq_and_stride('5Min') -> 'Min', 5 |
| 3594 | + """ |
| 3595 | + groups = opattern.match(freqstr) |
| 3596 | + |
| 3597 | + if not groups: |
| 3598 | + raise ValueError(f"Could not evaluate {freqstr}") |
| 3599 | + |
| 3600 | + stride = groups.group(1) |
| 3601 | + |
| 3602 | + if len(stride): |
| 3603 | + stride = int(stride) |
| 3604 | + else: |
| 3605 | + stride = 1 |
| 3606 | + |
| 3607 | + base = groups.group(2) |
| 3608 | + |
| 3609 | + return base, stride |
| 3610 | + |
| 3611 | + |
| 3612 | +# TODO: better name? |
| 3613 | +def _get_offset(name: str) -> BaseOffset: |
| 3614 | + """ |
| 3615 | + Return DateOffset object associated with rule name. |
| 3616 | + |
| 3617 | + Examples |
| 3618 | + -------- |
| 3619 | + _get_offset('EOM') --> BMonthEnd(1) |
| 3620 | + """ |
| 3621 | + if name not in _dont_uppercase: |
| 3622 | + name = name.upper() |
| 3623 | + name = _lite_rule_alias.get(name, name) |
| 3624 | + name = _lite_rule_alias.get(name.lower(), name) |
| 3625 | + else: |
| 3626 | + name = _lite_rule_alias.get(name, name) |
| 3627 | + |
| 3628 | + if name not in _offset_map: |
| 3629 | + try: |
| 3630 | + split = name.split("-") |
| 3631 | + klass = prefix_mapping[split[0]] |
| 3632 | + # handles case where there's no suffix (and will TypeError if too |
| 3633 | + # many '-') |
| 3634 | + offset = klass._from_name(*split[1:]) |
| 3635 | + except (ValueError, TypeError, KeyError) as err: |
| 3636 | + # bad prefix or suffix |
| 3637 | + raise ValueError(INVALID_FREQ_ERR_MSG.format(name)) from err |
| 3638 | + # cache |
| 3639 | + _offset_map[name] = offset |
| 3640 | + |
| 3641 | + return _offset_map[name] |
| 3642 | + |
| 3643 | + |
| 3644 | +cpdef to_offset(freq): |
| 3645 | + """ |
| 3646 | + Return DateOffset object from string or tuple representation |
| 3647 | + or datetime.timedelta object. |
| 3648 | +
|
| 3649 | + Parameters |
| 3650 | + ---------- |
| 3651 | + freq : str, tuple, datetime.timedelta, DateOffset or None |
| 3652 | +
|
| 3653 | + Returns |
| 3654 | + ------- |
| 3655 | + DateOffset or None |
| 3656 | +
|
| 3657 | + Raises |
| 3658 | + ------ |
| 3659 | + ValueError |
| 3660 | + If freq is an invalid frequency |
| 3661 | +
|
| 3662 | + See Also |
| 3663 | + -------- |
| 3664 | + DateOffset : Standard kind of date increment used for a date range. |
| 3665 | +
|
| 3666 | + Examples |
| 3667 | + -------- |
| 3668 | + >>> to_offset("5min") |
| 3669 | + <5 * Minutes> |
| 3670 | +
|
| 3671 | + >>> to_offset("1D1H") |
| 3672 | + <25 * Hours> |
| 3673 | +
|
| 3674 | + >>> to_offset(("W", 2)) |
| 3675 | + <2 * Weeks: weekday=6> |
| 3676 | +
|
| 3677 | + >>> to_offset((2, "B")) |
| 3678 | + <2 * BusinessDays> |
| 3679 | +
|
| 3680 | + >>> to_offset(pd.Timedelta(days=1)) |
| 3681 | + <Day> |
| 3682 | +
|
| 3683 | + >>> to_offset(Hour()) |
| 3684 | + <Hour> |
| 3685 | + """ |
| 3686 | + if freq is None: |
| 3687 | + return None |
| 3688 | + |
| 3689 | + if isinstance(freq, BaseOffset): |
| 3690 | + return freq |
| 3691 | + |
| 3692 | + if isinstance(freq, tuple): |
| 3693 | + name = freq[0] |
| 3694 | + stride = freq[1] |
| 3695 | + if isinstance(stride, str): |
| 3696 | + name, stride = stride, name |
| 3697 | + name, _ = base_and_stride(name) |
| 3698 | + delta = _get_offset(name) * stride |
| 3699 | + |
| 3700 | + elif isinstance(freq, timedelta): |
| 3701 | + from .timedeltas import Timedelta |
| 3702 | + |
| 3703 | + delta = None |
| 3704 | + freq = Timedelta(freq) |
| 3705 | + try: |
| 3706 | + for name in freq.components._fields: |
| 3707 | + offset = _name_to_offset_map[name] |
| 3708 | + stride = getattr(freq.components, name) |
| 3709 | + if stride != 0: |
| 3710 | + offset = stride * offset |
| 3711 | + if delta is None: |
| 3712 | + delta = offset |
| 3713 | + else: |
| 3714 | + delta = delta + offset |
| 3715 | + except ValueError as err: |
| 3716 | + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err |
| 3717 | + |
| 3718 | + else: |
| 3719 | + delta = None |
| 3720 | + stride_sign = None |
| 3721 | + try: |
| 3722 | + split = re.split(opattern, freq) |
| 3723 | + if split[-1] != "" and not split[-1].isspace(): |
| 3724 | + # the last element must be blank |
| 3725 | + raise ValueError("last element must be blank") |
| 3726 | + for sep, stride, name in zip(split[0::4], split[1::4], split[2::4]): |
| 3727 | + if sep != "" and not sep.isspace(): |
| 3728 | + raise ValueError("separator must be spaces") |
| 3729 | + prefix = _lite_rule_alias.get(name) or name |
| 3730 | + if stride_sign is None: |
| 3731 | + stride_sign = -1 if stride.startswith("-") else 1 |
| 3732 | + if not stride: |
| 3733 | + stride = 1 |
| 3734 | + |
| 3735 | + from .resolution import Resolution # TODO: avoid runtime import |
| 3736 | + |
| 3737 | + if prefix in Resolution.reso_str_bump_map: |
| 3738 | + stride, name = Resolution.get_stride_from_decimal( |
| 3739 | + float(stride), prefix |
| 3740 | + ) |
| 3741 | + stride = int(stride) |
| 3742 | + offset = _get_offset(name) |
| 3743 | + offset = offset * int(np.fabs(stride) * stride_sign) |
| 3744 | + if delta is None: |
| 3745 | + delta = offset |
| 3746 | + else: |
| 3747 | + delta = delta + offset |
| 3748 | + except (ValueError, TypeError) as err: |
| 3749 | + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err |
| 3750 | + |
| 3751 | + if delta is None: |
| 3752 | + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) |
| 3753 | + |
| 3754 | + return delta |
| 3755 | + |
3545 | 3756 |
|
3546 | 3757 | # ----------------------------------------------------------------------
|
3547 | 3758 | # RelativeDelta Arithmetic
|
|
0 commit comments