From 3e88ed10314f6f935c6e82216e56e7feb0ae1d73 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 16 Sep 2022 23:00:16 -0400 Subject: [PATCH 1/5] WIP: TimedeltaIndex accessors --- pandas-stubs/core/indexes/accessors.pyi | 54 ++++++++++++++++++++---- pandas-stubs/core/indexes/datetimes.pyi | 2 +- pandas-stubs/core/indexes/timedeltas.pyi | 4 +- tests/test_timefuncs.py | 20 +++++++++ 4 files changed, 69 insertions(+), 11 deletions(-) diff --git a/pandas-stubs/core/indexes/accessors.pyi b/pandas-stubs/core/indexes/accessors.pyi index 64ac45d69..3da7ed8ee 100644 --- a/pandas-stubs/core/indexes/accessors.pyi +++ b/pandas-stubs/core/indexes/accessors.pyi @@ -13,6 +13,7 @@ from pandas import ( Index, PeriodIndex, Timedelta, + TimedeltaIndex, ) from pandas.core.accessor import PandasDelegate from pandas.core.arrays import ( @@ -156,6 +157,7 @@ _DTRoundingMethodReturnType = TypeVar( TimedeltaSeries, TimestampSeries, DatetimeIndex, + # TimedeltaIndex ) class _DatetimeRoundingMethods(Generic[_DTRoundingMethodReturnType]): @@ -278,23 +280,30 @@ class DatetimeProperties( def to_pydatetime(self) -> np.ndarray: ... def isocalendar(self) -> DataFrame: ... -class _TimedeltaPropertiesNoRounding: +_TDNoRoundingMethodReturnType = TypeVar( + "_TDNoRoundingMethodReturnType", Series[int], Index +) +_TDTotalSecondsReturnType = TypeVar("_TDTotalSecondsReturnType", Series[float], Index) + +class _TimedeltaPropertiesNoRounding( + Generic[_TDNoRoundingMethodReturnType, _TDTotalSecondsReturnType] +): def to_pytimedelta(self) -> np.ndarray: ... @property def components(self) -> DataFrame: ... @property - def days(self) -> Series[int]: ... + def days(self) -> _TDNoRoundingMethodReturnType: ... @property - def seconds(self) -> Series[int]: ... + def seconds(self) -> _TDNoRoundingMethodReturnType: ... @property - def microseconds(self) -> Series[int]: ... + def microseconds(self) -> _TDNoRoundingMethodReturnType: ... @property - def nanoseconds(self) -> Series[int]: ... - def total_seconds(self) -> Series[float]: ... + def nanoseconds(self) -> _TDNoRoundingMethodReturnType: ... + def total_seconds(self) -> _TDTotalSecondsReturnType: ... class TimedeltaProperties( Properties, - _TimedeltaPropertiesNoRounding, + _TimedeltaPropertiesNoRounding[Series[int], Series[float]], _DatetimeRoundingMethods[TimedeltaSeries], ): ... @@ -337,7 +346,7 @@ class CombinedDatetimelikeProperties( Series[str], PeriodSeries, ], - _TimedeltaPropertiesNoRounding, + _TimedeltaPropertiesNoRounding[Series[int], Series[float]], _PeriodProperties, ): def __new__(cls, data: Series): ... @@ -379,3 +388,32 @@ class DatetimeIndexProperties( def std( self, axis: int | None = ..., ddof: int = ..., skipna: bool = ... ) -> Timedelta: ... + +# For some reason, using TimedeltaIndex as an argument to _DatetimeRoundingMethods +# doesn't work for pyright. So we just make the rounding methods explicit here. +class TimedeltaIndexProperties( + Properties, + _TimedeltaPropertiesNoRounding[Index, Index], + # _DatetimeRoundingMethods[TimedeltaIndex], +): + def round( + self, + freq: str | BaseOffset | None, + ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., + nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] + | Timedelta = ..., + ) -> TimedeltaIndex: ... + def floor( + self, + freq: str | BaseOffset | None, + ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., + nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] + | Timedelta = ..., + ) -> TimedeltaIndex: ... + def ceil( + self, + freq: str | BaseOffset | None, + ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., + nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] + | Timedelta = ..., + ) -> TimedeltaIndex: ... diff --git a/pandas-stubs/core/indexes/datetimes.pyi b/pandas-stubs/core/indexes/datetimes.pyi index c880f495d..143970b40 100644 --- a/pandas-stubs/core/indexes/datetimes.pyi +++ b/pandas-stubs/core/indexes/datetimes.pyi @@ -5,12 +5,12 @@ import numpy as np from pandas import ( DataFrame, Timedelta, + TimedeltaIndex, Timestamp, ) from pandas.core.indexes.accessors import DatetimeIndexProperties from pandas.core.indexes.api import Float64Index from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin -from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.series import ( TimedeltaSeries, TimestampSeries, diff --git a/pandas-stubs/core/indexes/timedeltas.pyi b/pandas-stubs/core/indexes/timedeltas.pyi index 2483f1cbf..885b8648f 100644 --- a/pandas-stubs/core/indexes/timedeltas.pyi +++ b/pandas-stubs/core/indexes/timedeltas.pyi @@ -1,6 +1,6 @@ from typing import overload -from pandas.core.arrays.datetimelike import TimelikeOps +from pandas.core.indexes.accessors import TimedeltaIndexProperties from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.series import TimedeltaSeries @@ -11,7 +11,7 @@ from pandas._libs import ( ) from pandas._typing import num -class TimedeltaIndex(DatetimeTimedeltaMixin, TimelikeOps): +class TimedeltaIndex(DatetimeTimedeltaMixin, TimedeltaIndexProperties): def __new__( cls, data=..., diff --git a/tests/test_timefuncs.py b/tests/test_timefuncs.py index cd87eae61..8e5a39fea 100644 --- a/tests/test_timefuncs.py +++ b/tests/test_timefuncs.py @@ -436,6 +436,26 @@ def test_datetimeindex_accessors() -> None: check(assert_type(i0.is_normalized, bool), bool) +def test_timedeltaindex_accessors() -> None: + # GH 292 + i0 = pd.date_range("1/1/2021", "1/5/2021") - pd.Timestamp("1/3/2019") + check(assert_type(i0, pd.TimedeltaIndex), pd.TimedeltaIndex) + check(assert_type(i0.days, pd.Index), pd.Index, int) + check(assert_type(i0.seconds, pd.Index), pd.Index, int) + check(assert_type(i0.microseconds, pd.Index), pd.Index, int) + check(assert_type(i0.nanoseconds, pd.Index), pd.Index, int) + check(assert_type(i0.components, pd.DataFrame), pd.DataFrame) + check(assert_type(i0.to_pytimedelta(), np.ndarray), np.ndarray) + check(assert_type(i0.total_seconds(), pd.Index), pd.Index, float) + check( + assert_type(i0.round("D"), pd.TimedeltaIndex), pd.TimedeltaIndex, pd.Timedelta + ) + check( + assert_type(i0.floor("D"), pd.TimedeltaIndex), pd.TimedeltaIndex, pd.Timedelta + ) + check(assert_type(i0.ceil("D"), pd.TimedeltaIndex), pd.TimedeltaIndex, pd.Timedelta) + + def test_some_offsets() -> None: # GH 222 From 329486c05d7b21a8df557250b923b7a7b965eeaa Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 16 Sep 2022 23:06:34 -0400 Subject: [PATCH 2/5] remove dtl to make mypy happy --- pandas-stubs/core/arrays/datetimes.pyi | 8 ++++++-- pandas-stubs/core/arrays/period.pyi | 7 +++++-- pandas-stubs/core/arrays/timedeltas.pyi | 7 +++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/core/arrays/datetimes.pyi b/pandas-stubs/core/arrays/datetimes.pyi index e544d47be..9e1ccc09c 100644 --- a/pandas-stubs/core/arrays/datetimes.pyi +++ b/pandas-stubs/core/arrays/datetimes.pyi @@ -1,13 +1,17 @@ from datetime import tzinfo import numpy as np -from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.datetimelike import ( + DatelikeOps, + DatetimeLikeArrayMixin, + TimelikeOps, +) from pandas.core.dtypes.dtypes import DatetimeTZDtype as DatetimeTZDtype def tz_to_dtype(tz): ... -class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps): +class DatetimeArray(DatetimeLikeArrayMixin, TimelikeOps, DatelikeOps): __array_priority__: int = ... def __init__(self, values, dtype=..., freq=..., copy: bool = ...) -> None: ... # ignore in dtype() is from the pandas source diff --git a/pandas-stubs/core/arrays/period.pyi b/pandas-stubs/core/arrays/period.pyi index ea5328705..081baeb94 100644 --- a/pandas-stubs/core/arrays/period.pyi +++ b/pandas-stubs/core/arrays/period.pyi @@ -1,14 +1,17 @@ from typing import Sequence import numpy as np -from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.datetimelike import ( + DatelikeOps, + DatetimeLikeArrayMixin, +) from pandas._libs.tslibs import Timestamp from pandas._libs.tslibs.period import Period as Period from pandas.tseries.offsets import Tick as Tick -class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): +class PeriodArray(DatetimeLikeArrayMixin, DatelikeOps): __array_priority__: int = ... def __init__(self, values, freq=..., dtype=..., copy: bool = ...) -> None: ... def dtype(self): ... diff --git a/pandas-stubs/core/arrays/timedeltas.pyi b/pandas-stubs/core/arrays/timedeltas.pyi index 500cb4f57..89c31ad03 100644 --- a/pandas-stubs/core/arrays/timedeltas.pyi +++ b/pandas-stubs/core/arrays/timedeltas.pyi @@ -1,9 +1,12 @@ from datetime import timedelta from typing import Sequence -from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.datetimelike import ( + DatetimeLikeArrayMixin, + TimelikeOps, +) -class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): +class TimedeltaArray(DatetimeLikeArrayMixin, TimelikeOps): __array_priority__: int = ... @property def dtype(self): ... From 75a1d095c170913aed0e79824d0087f88ec58801 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 16 Sep 2022 23:24:28 -0400 Subject: [PATCH 3/5] demonstrate possible bug --- pandas-stubs/core/indexes/accessors.pyi | 49 +++++++++++++------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/pandas-stubs/core/indexes/accessors.pyi b/pandas-stubs/core/indexes/accessors.pyi index 3da7ed8ee..81b9a2458 100644 --- a/pandas-stubs/core/indexes/accessors.pyi +++ b/pandas-stubs/core/indexes/accessors.pyi @@ -157,7 +157,7 @@ _DTRoundingMethodReturnType = TypeVar( TimedeltaSeries, TimestampSeries, DatetimeIndex, - # TimedeltaIndex + TimedeltaIndex, ) class _DatetimeRoundingMethods(Generic[_DTRoundingMethodReturnType]): @@ -394,26 +394,27 @@ class DatetimeIndexProperties( class TimedeltaIndexProperties( Properties, _TimedeltaPropertiesNoRounding[Index, Index], - # _DatetimeRoundingMethods[TimedeltaIndex], -): - def round( - self, - freq: str | BaseOffset | None, - ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., - nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] - | Timedelta = ..., - ) -> TimedeltaIndex: ... - def floor( - self, - freq: str | BaseOffset | None, - ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., - nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] - | Timedelta = ..., - ) -> TimedeltaIndex: ... - def ceil( - self, - freq: str | BaseOffset | None, - ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., - nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] - | Timedelta = ..., - ) -> TimedeltaIndex: ... + _DatetimeRoundingMethods[TimedeltaIndex], +): ... + +# def round( +# self, +# freq: str | BaseOffset | None, +# ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., +# nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] +# | Timedelta = ..., +# ) -> TimedeltaIndex: ... +# def floor( +# self, +# freq: str | BaseOffset | None, +# ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., +# nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] +# | Timedelta = ..., +# ) -> TimedeltaIndex: ... +# def ceil( +# self, +# freq: str | BaseOffset | None, +# ambiguous: Literal["raise", "infer", "NaT"] | np_ndarray_bool = ..., +# nonexistent: Literal["shift_forward", "shift_backward", "NaT", "raise"] +# | Timedelta = ..., +# ) -> TimedeltaIndex: ... From f8006760a7c64c65285c3f18b9e020f7d0a22971 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 30 Sep 2022 20:20:06 -0400 Subject: [PATCH 4/5] fix series loc on multiindex scalars --- pandas-stubs/core/series.pyi | 14 ++++++++------ tests/test_series.py | 9 ++++++--- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 6184fe833..7c7582983 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -129,6 +129,13 @@ class _iLocIndexerSeries(_iLocIndexer, Generic[S1]): ) -> None: ... class _LocIndexerSeries(_LocIndexer, Generic[S1]): + # ignore needed because of mypy. Overlapping, but we want to distinguish + # having a tuple of just scalars, versus tuples that include slices or Index + @overload + def __getitem__( # type: ignore[misc] + self, + idx: Scalar | tuple[Scalar, ...], + ) -> S1: ... @overload def __getitem__( self, @@ -137,14 +144,9 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): | Sequence[float] | list[str] | slice - | tuple[str | float | slice | Index, ...], + | tuple[Scalar | slice | Index, ...], ) -> Series[S1]: ... @overload - def __getitem__( - self, - idx: str | float, - ) -> S1: ... - @overload def __setitem__( self, idx: Index | MaskType, diff --git a/tests/test_series.py b/tests/test_series.py index 75061d886..c5c1fc556 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -120,9 +120,12 @@ def test_types_loc_at() -> None: def test_multiindex_loc() -> None: - s = pd.Series([1, 2, 3, 4], index=pd.MultiIndex.from_product([[1, 2], ["a", "b"]])) - check(assert_type(s.loc[1, :], pd.Series), pd.Series) - check(assert_type(s.loc[pd.Index([1]), :], pd.Series), pd.Series) + s = pd.Series( + [1, 2, 3, 4], index=pd.MultiIndex.from_product([[1, 2], ["a", "b"]]), dtype=int + ) + check(assert_type(s.loc[1, :], "pd.Series[int]"), pd.Series, int) + check(assert_type(s.loc[pd.Index([1]), :], "pd.Series[int]"), pd.Series, int) + check(assert_type(s.loc[1, "a"], int), np.int_) def test_types_boolean_indexing() -> None: From e7d31e6d66a5c92419cef07240508221c1c18524 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 1 Oct 2022 22:31:15 -0400 Subject: [PATCH 5/5] use IndexSliceTuple, add comments --- pandas-stubs/core/series.pyi | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index afeb38fb1..85c5de1b8 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -48,6 +48,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import ( _AtIndexer, _iAtIndexer, + _IndexSliceTuple, ) from pandas.core.resample import Resampler from pandas.core.strings import StringMethods @@ -137,16 +138,14 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): def __getitem__( # type: ignore[misc] self, idx: Scalar | tuple[Scalar, ...], + # tuple case is for getting a specific element when using a MultiIndex ) -> S1: ... @overload def __getitem__( self, - idx: MaskType - | Index - | Sequence[float] - | list[str] - | slice - | tuple[Scalar | slice | Index, ...], + idx: MaskType | Index | Sequence[float] | list[str] | slice | _IndexSliceTuple, + # _IndexSliceTuple is when having a tuple that includes a slice. Could just + # be s.loc[1, :], or s.loc[pd.IndexSlice[1, :]] ) -> Series[S1]: ... @overload def __setitem__(