From f83487099f8388615c792263c98565962ee67b11 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 16 Apr 2021 14:28:54 -0700 Subject: [PATCH 1/3] TYP: libperiod --- pandas/_libs/tslibs/period.pyi | 147 +++++++++++++++++++++++++++++ pandas/_libs/tslibs/period.pyx | 6 +- pandas/core/arrays/datetimelike.py | 4 +- pandas/core/arrays/period.py | 18 +++- 4 files changed, 166 insertions(+), 9 deletions(-) create mode 100644 pandas/_libs/tslibs/period.pyi diff --git a/pandas/_libs/tslibs/period.pyi b/pandas/_libs/tslibs/period.pyi new file mode 100644 index 0000000000000..37fa9c3314419 --- /dev/null +++ b/pandas/_libs/tslibs/period.pyi @@ -0,0 +1,147 @@ +import numpy as np + +from pandas._libs.tslibs.nattype import NaTType +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._libs.tslibs.timestamps import Timestamp + +INVALID_FREQ_ERR_MSG: str +DIFFERENT_FREQ: str + +class IncompatibleFrequency(ValueError): ... + +def periodarr_to_dt64arr( + periodarr: np.ndarray, # const int64_t[:] + freq: int, +) -> np.ndarray: ... # np.ndarray[np.int64] + +def period_asfreq_arr( + arr: np.ndarray, # ndarray[int64_t] arr, + freq1: int, + freq2: int, + end: bool, +) -> np.ndarray: ... # np.ndarray[np.int64] + +def get_period_field_arr( + field: str, + arr: np.ndarray, # const int64_t[:] + freq: int, +) -> np.ndarray: ... # np.ndarray[np.int64] + +def from_ordinals( + values: np.ndarray, # const int64_t[:] + freq, +) -> np.ndarray: ... # np.ndarray[np.int64] + +def extract_ordinals( + values: np.ndarray, # np.ndarray[object] + freq, +) -> np.ndarray: ... # np.ndarray[np.int64] + +def extract_freq( + values: np.ndarray, # np.ndarray[object] +) -> BaseOffset: ... + +# exposed for tests +def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ... + +def period_ordinal( + y: int, m: int, d: int, h: int, min: int, s: int, us: int, ps: int, freq: int +) -> int: ... + +def freq_to_dtype_code(freq: BaseOffset) -> int: ... +def validate_end_alias(how: str) -> str: ... + +class Period: + ordinal: int # int64_t + freq: BaseOffset + + # error: "__new__" must return a class instance (got "Union[Period, NaTType]") + def __new__( # type: ignore[misc] + cls, + value=None, + freq=None, + ordinal=None, + year=None, + month=None, + quarter=None, + day=None, + hour=None, + minute=None, + second=None, + ) -> Period | NaTType: ... + + @classmethod + def _maybe_convert_freq(cls, freq) -> BaseOffset: ... + + @classmethod + def _from_ordinal(cls, ordinal: int, freq) -> Period: ... + + @classmethod + def now(cls, freq=...) -> Period: ... + + def strftime(self, fmt: str) -> str: ... + + def to_timestamp(self, freq=..., how=..., tz=...) -> Timestamp: ... + + def asfreq(self, freq, how=...) -> Period: ... + + @property + def freqstr(self) -> str: ... + + @property + def is_leap_year(self) -> bool: ... + + @property + def daysinmonth(self) -> int: ... + + @property + def days_in_month(self) -> int: ... + + @property + def qyear(self) -> int: ... + + @property + def quarter(self) -> int: ... + + @property + def day_of_year(self) -> int: ... + + @property + def weekday(self) -> int: ... + + @property + def day_of_week(self) -> int: ... + + @property + def week(self) -> int: ... + + @property + def weekofyear(self) -> int: ... + + @property + def second(self) -> int: ... + + @property + def minute(self) -> int: ... + + @property + def hour(self) -> int: ... + + @property + def day(self) -> int: ... + + @property + def month(self) -> int: ... + + @property + def year(self) -> int: ... + + @property + def end_time(self) -> Timestamp: ... + + @property + def start_time(self) -> Timestamp: ... + + def __sub__(self, other): ... + + def __add__(self, other) -> Period: ... diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 165f51d06af6d..e8b0aa9037f2f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1445,7 +1445,7 @@ def from_ordinals(const int64_t[:] values, freq): @cython.wraparound(False) @cython.boundscheck(False) -def extract_ordinals(ndarray[object] values, freq): +def extract_ordinals(ndarray[object] values, freq) -> np.ndarray: # TODO: Change type to const object[:] when Cython supports that. cdef: @@ -1483,7 +1483,7 @@ def extract_ordinals(ndarray[object] values, freq): return ordinals.base # .base to access underlying np.ndarray -def extract_freq(ndarray[object] values): +def extract_freq(ndarray[object] values) -> BaseOffset: # TODO: Change type to const object[:] when Cython supports that. cdef: @@ -2539,7 +2539,7 @@ cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, minute, second, 0, 0, base) -def validate_end_alias(how): +def validate_end_alias(how: str) -> str: how_dict = {'S': 'S', 'E': 'E', 'START': 'S', 'FINISH': 'E', 'BEGIN': 'S', 'END': 'E'} diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4a5dca348a8c0..1491a028b5a6c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -591,7 +591,9 @@ def _validate_shift_value(self, fill_value): # kludge for #31971 since Period(integer) tries to cast to str new_fill = Period._from_ordinal(fill_value, freq=self.freq) else: - new_fill = self._scalar_type(fill_value) + # error: Incompatible types in assignment (expression has type + # "Union[Period, Any, Timedelta]", variable has type "Period") + new_fill = self._scalar_type(fill_value) # type: ignore[assignment] # stacklevel here is chosen to be correct when called from # DataFrame.shift or Series.shift diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5a9dd0e89bd65..56d368ba0839c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -295,9 +295,17 @@ def _generate_range(cls, start, end, periods, freq, fields): # ----------------------------------------------------------------- # DatetimeLike Interface - def _unbox_scalar(self, value: Period | NaTType, setitem: bool = False) -> np.int64: + # error: Argument 1 of "_unbox_scalar" is incompatible with supertype + # "DatetimeLikeArrayMixin"; supertype defines the argument type as + # "Union[Union[Period, Any, Timedelta], NaTType]" + def _unbox_scalar( # type: ignore[override] + self, + value: Period | NaTType, + setitem: bool = False, + ) -> np.int64: if value is NaT: - return np.int64(value.value) + # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value" + return np.int64(value.value) # type: ignore[union-attr] elif isinstance(value, self._scalar_type): self._check_compatible_with(value, setitem=setitem) return np.int64(value.ordinal) @@ -482,9 +490,9 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: freq = Period._maybe_convert_freq(freq) base = freq._period_dtype_code - new_data = self.asfreq(freq, how=how) + new_parr = self.asfreq(freq, how=how) - new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) + new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base) return DatetimeArray(new_data)._with_freq("infer") # -------------------------------------------------------------------- @@ -910,7 +918,7 @@ def raise_on_incompatible(left, right): def period_array( - data: Sequence[Period | None] | AnyArrayLike, + data: Sequence[Period | str | None] | AnyArrayLike, freq: str | Tick | None = None, copy: bool = False, ) -> PeriodArray: From a81761d1b0b0dffcf953958bb38ca745a0c371ba Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Apr 2021 15:55:46 -0700 Subject: [PATCH 2/3] update per comments --- pandas/_libs/tslibs/period.pyi | 21 ++++++++++++++++----- pandas/_libs/tslibs/period.pyx | 2 +- pandas/core/arrays/datetimelike.py | 6 +++--- pandas/core/resample.py | 3 ++- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyi b/pandas/_libs/tslibs/period.pyi index 37fa9c3314419..49e630d605310 100644 --- a/pandas/_libs/tslibs/period.pyi +++ b/pandas/_libs/tslibs/period.pyi @@ -1,8 +1,14 @@ +from typing import Literal + import numpy as np from pandas._libs.tslibs.nattype import NaTType from pandas._libs.tslibs.offsets import BaseOffset from pandas._libs.tslibs.timestamps import Timestamp +from pandas._typing import ( + Frequency, + Timezone, +) INVALID_FREQ_ERR_MSG: str DIFFERENT_FREQ: str @@ -29,12 +35,12 @@ def get_period_field_arr( def from_ordinals( values: np.ndarray, # const int64_t[:] - freq, + freq: Frequency, ) -> np.ndarray: ... # np.ndarray[np.int64] def extract_ordinals( values: np.ndarray, # np.ndarray[object] - freq, + freq: Frequency | int, ) -> np.ndarray: ... # np.ndarray[np.int64] def extract_freq( @@ -49,7 +55,7 @@ def period_ordinal( ) -> int: ... def freq_to_dtype_code(freq: BaseOffset) -> int: ... -def validate_end_alias(how: str) -> str: ... +def validate_end_alias(how: str) -> Literal["E", "S"]: ... class Period: ordinal: int # int64_t @@ -81,7 +87,12 @@ class Period: def strftime(self, fmt: str) -> str: ... - def to_timestamp(self, freq=..., how=..., tz=...) -> Timestamp: ... + def to_timestamp( + self, + freq: str | BaseOffset | None =..., + how: str = ..., + tz: Timezone | None = ..., + ) -> Timestamp: ... def asfreq(self, freq, how=...) -> Period: ... @@ -142,6 +153,6 @@ class Period: @property def start_time(self) -> Timestamp: ... - def __sub__(self, other): ... + def __sub__(self, other) -> Period | BaseOffset: ... def __add__(self, other) -> Period: ... diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e8b0aa9037f2f..0bb431bc8e1cd 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2539,7 +2539,7 @@ cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, minute, second, 0, 0, base) -def validate_end_alias(how: str) -> str: +def validate_end_alias(how: str) -> str: # Literal["E", "S"] how_dict = {'S': 'S', 'E': 'E', 'START': 'S', 'FINISH': 'E', 'BEGIN': 'S', 'END': 'E'} diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1491a028b5a6c..dabeb7860f692 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -586,14 +586,14 @@ def _validate_shift_value(self, fill_value): elif isinstance(fill_value, self._recognized_scalars): fill_value = self._scalar_type(fill_value) else: + new_fill: DatetimeLikeScalar + # only warn if we're not going to raise if self._scalar_type is Period and lib.is_integer(fill_value): # kludge for #31971 since Period(integer) tries to cast to str new_fill = Period._from_ordinal(fill_value, freq=self.freq) else: - # error: Incompatible types in assignment (expression has type - # "Union[Period, Any, Timedelta]", variable has type "Period") - new_fill = self._scalar_type(fill_value) # type: ignore[assignment] + new_fill = self._scalar_type(fill_value) # stacklevel here is chosen to be correct when called from # DataFrame.shift or Series.shift diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 469325cf04189..af0b188a15b5f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1772,7 +1772,8 @@ def _get_period_bins(self, ax: PeriodIndex): # Get offset for bin edge (not label edge) adjustment start_offset = Period(start, self.freq) - Period(p_start, self.freq) - bin_shift = start_offset.n % freq_mult + # error: Item "Period" of "Union[Period, Any]" has no attribute "n" + bin_shift = start_offset.n % freq_mult # type: ignore[union-attr] start = p_start labels = binner = period_range( From fd2dc45cd5bb7eed01ed6a9a30d24842d4b5143d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 May 2021 07:19:54 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/resample.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 969ad1bd00165..aae6314968695 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1904,17 +1904,17 @@ def _get_period_range_edges( raise TypeError("'first' and 'last' must be instances of type Period") # GH 23882 - first = first.to_timestamp() - last = last.to_timestamp() - adjust_first = not freq.is_on_offset(first) - adjust_last = freq.is_on_offset(last) + first_ts = first.to_timestamp() + last_ts = last.to_timestamp() + adjust_first = not freq.is_on_offset(first_ts) + adjust_last = freq.is_on_offset(last_ts) - first, last = _get_timestamp_range_edges( - first, last, freq, closed=closed, origin=origin, offset=offset + first_ts, last_ts = _get_timestamp_range_edges( + first_ts, last_ts, freq, closed=closed, origin=origin, offset=offset ) - first = (first + int(adjust_first) * freq).to_period(freq) - last = (last - int(adjust_last) * freq).to_period(freq) + first = (first_ts + int(adjust_first) * freq).to_period(freq) + last = (last_ts - int(adjust_last) * freq).to_period(freq) return first, last