Skip to content

TYP: libperiod #40990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions pandas/_libs/tslibs/period.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from typing import Literal

import numpy as np

from pandas._libs.tslibs.nattype import NaTType
from pandas._libs.tslibs.offsets import BaseOffset
from pandas._libs.tslibs.timestamps import Timestamp
from pandas._typing import (
Frequency,
Timezone,
)

INVALID_FREQ_ERR_MSG: str
DIFFERENT_FREQ: str

class IncompatibleFrequency(ValueError): ...

def periodarr_to_dt64arr(
periodarr: np.ndarray, # const int64_t[:]
freq: int,
) -> np.ndarray: ... # np.ndarray[np.int64]

def period_asfreq_arr(
arr: np.ndarray, # ndarray[int64_t] arr,
freq1: int,
freq2: int,
end: bool,
) -> np.ndarray: ... # np.ndarray[np.int64]

def get_period_field_arr(
field: str,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a alias to a Literal for this to match accessor type is the cython code.

arr: np.ndarray, # const int64_t[:]
freq: int,
) -> np.ndarray: ... # np.ndarray[np.int64]

def from_ordinals(
values: np.ndarray, # const int64_t[:]
freq: Frequency,
) -> np.ndarray: ... # np.ndarray[np.int64]

def extract_ordinals(
values: np.ndarray, # np.ndarray[object]
freq: Frequency | int,
) -> np.ndarray: ... # np.ndarray[np.int64]

def extract_freq(
values: np.ndarray, # np.ndarray[object]
) -> BaseOffset: ...

# exposed for tests
def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ...

def period_ordinal(
y: int, m: int, d: int, h: int, min: int, s: int, us: int, ps: int, freq: int
) -> int: ...

def freq_to_dtype_code(freq: BaseOffset) -> int: ...
def validate_end_alias(how: str) -> Literal["E", "S"]: ...

class Period:
ordinal: int # int64_t
freq: BaseOffset

# error: "__new__" must return a class instance (got "Union[Period, NaTType]")
def __new__( # type: ignore[misc]
cls,
value=None,
freq=None,
ordinal=None,
year=None,
month=None,
quarter=None,
day=None,
hour=None,
minute=None,
second=None,
) -> Period | NaTType: ...

@classmethod
def _maybe_convert_freq(cls, freq) -> BaseOffset: ...

@classmethod
def _from_ordinal(cls, ordinal: int, freq) -> Period: ...

@classmethod
def now(cls, freq=...) -> Period: ...

def strftime(self, fmt: str) -> str: ...

def to_timestamp(
self,
freq: str | BaseOffset | None =...,
how: str = ...,
tz: Timezone | None = ...,
) -> Timestamp: ...

def asfreq(self, freq, how=...) -> Period: ...

@property
def freqstr(self) -> str: ...

@property
def is_leap_year(self) -> bool: ...

@property
def daysinmonth(self) -> int: ...

@property
def days_in_month(self) -> int: ...

@property
def qyear(self) -> int: ...

@property
def quarter(self) -> int: ...

@property
def day_of_year(self) -> int: ...

@property
def weekday(self) -> int: ...

@property
def day_of_week(self) -> int: ...

@property
def week(self) -> int: ...

@property
def weekofyear(self) -> int: ...

@property
def second(self) -> int: ...

@property
def minute(self) -> int: ...

@property
def hour(self) -> int: ...

@property
def day(self) -> int: ...

@property
def month(self) -> int: ...

@property
def year(self) -> int: ...

@property
def end_time(self) -> Timestamp: ...

@property
def start_time(self) -> Timestamp: ...

def __sub__(self, other) -> Period | BaseOffset: ...

def __add__(self, other) -> Period: ...
6 changes: 3 additions & 3 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1445,7 +1445,7 @@ def from_ordinals(const int64_t[:] values, freq):

@cython.wraparound(False)
@cython.boundscheck(False)
def extract_ordinals(ndarray[object] values, freq):
def extract_ordinals(ndarray[object] values, freq) -> np.ndarray:
# TODO: Change type to const object[:] when Cython supports that.

cdef:
Expand Down Expand Up @@ -1483,7 +1483,7 @@ def extract_ordinals(ndarray[object] values, freq):
return ordinals.base # .base to access underlying np.ndarray


def extract_freq(ndarray[object] values):
def extract_freq(ndarray[object] values) -> BaseOffset:
# TODO: Change type to const object[:] when Cython supports that.

cdef:
Expand Down Expand Up @@ -2539,7 +2539,7 @@ cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day,
minute, second, 0, 0, base)


def validate_end_alias(how):
def validate_end_alias(how: str) -> str: # Literal["E", "S"]
how_dict = {'S': 'S', 'E': 'E',
'START': 'S', 'FINISH': 'E',
'BEGIN': 'S', 'END': 'E'}
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,8 @@ def _validate_shift_value(self, fill_value):
elif isinstance(fill_value, self._recognized_scalars):
fill_value = self._scalar_type(fill_value)
else:
new_fill: DatetimeLikeScalar

# only warn if we're not going to raise
if self._scalar_type is Period and lib.is_integer(fill_value):
# kludge for #31971 since Period(integer) tries to cast to str
Expand Down
18 changes: 13 additions & 5 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,17 @@ def _generate_range(cls, start, end, periods, freq, fields):
# -----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value: Period | NaTType, setitem: bool = False) -> np.int64:
# error: Argument 1 of "_unbox_scalar" is incompatible with supertype
# "DatetimeLikeArrayMixin"; supertype defines the argument type as
# "Union[Union[Period, Any, Timedelta], NaTType]"
def _unbox_scalar( # type: ignore[override]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think DatetimeLikeArrayMixin needs to be Generic[DatetimeLikeScalarT].

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying this i get error: Free type variable expected in Generic[...] [misc]

I'll troubleshoot this a bit

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok to leave as a 'fix later' ignore so we can start benefitting straight away from the added types

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good

self,
value: Period | NaTType,
setitem: bool = False,
) -> np.int64:
if value is NaT:
return np.int64(value.value)
# error: Item "Period" of "Union[Period, NaTType]" has no attribute "value"
return np.int64(value.value) # type: ignore[union-attr]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a false positive. if not planning to 'fix later', can either cast or add a reference to a mypy issue.

maybe python/mypy#7642, although AFAICT the discussion there is more around a singleton class (i.e. final, not subclassed) rather than a singleton instance.

however, from python/mypy#7642 (comment)

You're trying to introduce a singleton value other than None, and you want the type checker to understand code that excludes the singleton value.

I think this is a reasonable request.

so probably an appropriate issue to discuss this.

elif isinstance(value, self._scalar_type):
self._check_compatible_with(value, setitem=setitem)
return np.int64(value.ordinal)
Expand Down Expand Up @@ -482,9 +490,9 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
freq = Period._maybe_convert_freq(freq)
base = freq._period_dtype_code

new_data = self.asfreq(freq, how=how)
new_parr = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
return DatetimeArray(new_data)._with_freq("infer")

# --------------------------------------------------------------------
Expand Down Expand Up @@ -910,7 +918,7 @@ def raise_on_incompatible(left, right):


def period_array(
data: Sequence[Period | None] | AnyArrayLike,
data: Sequence[Period | str | None] | AnyArrayLike,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and from the docstring pd.NaT (np.nan also seems to be acceptable but not pd.NA) and also datetime.datetime

>>> period_array(
...     [pd.Period("2017", freq="A"), pd.Period("2018", freq="A"), datetime.datetime.now()]
... )
<PeriodArray>
['2017', '2018', '2021']
Length: 3, dtype: period[A-DEC]
>>> 

freq: str | Tick | None = None,
copy: bool = False,
) -> PeriodArray:
Expand Down
19 changes: 10 additions & 9 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,8 @@ def _get_period_bins(self, ax: PeriodIndex):

# Get offset for bin edge (not label edge) adjustment
start_offset = Period(start, self.freq) - Period(p_start, self.freq)
bin_shift = start_offset.n % freq_mult
# error: Item "Period" of "Union[Period, Any]" has no attribute "n"
bin_shift = start_offset.n % freq_mult # type: ignore[union-attr]
start = p_start

labels = binner = period_range(
Expand Down Expand Up @@ -1903,17 +1904,17 @@ def _get_period_range_edges(
raise TypeError("'first' and 'last' must be instances of type Period")

# GH 23882
first = first.to_timestamp()
last = last.to_timestamp()
adjust_first = not freq.is_on_offset(first)
adjust_last = freq.is_on_offset(last)
first_ts = first.to_timestamp()
last_ts = last.to_timestamp()
adjust_first = not freq.is_on_offset(first_ts)
adjust_last = freq.is_on_offset(last_ts)

first, last = _get_timestamp_range_edges(
first, last, freq, closed=closed, origin=origin, offset=offset
first_ts, last_ts = _get_timestamp_range_edges(
first_ts, last_ts, freq, closed=closed, origin=origin, offset=offset
)

first = (first + int(adjust_first) * freq).to_period(freq)
last = (last - int(adjust_last) * freq).to_period(freq)
first = (first_ts + int(adjust_first) * freq).to_period(freq)
last = (last_ts - int(adjust_last) * freq).to_period(freq)
return first, last


Expand Down