From 950c36b8dd8c705007153d3ce86d964dc6c65a58 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sat, 1 Oct 2022 22:03:41 +0100 Subject: [PATCH 1/3] ENH: IMrpve interval_range and IntervalIndex --- pandas-stubs/core/indexes/interval.pyi | 72 ++++++++++++++++++++------ 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/pandas-stubs/core/indexes/interval.pyi b/pandas-stubs/core/indexes/interval.pyi index e442b47e1..47f970a50 100644 --- a/pandas-stubs/core/indexes/interval.pyi +++ b/pandas-stubs/core/indexes/interval.pyi @@ -1,15 +1,25 @@ -from typing import Hashable +import datetime as dt +from typing import ( + Any, + Hashable, + Sequence, +) import numpy as np +from pandas import Index from pandas.core.indexes.extension import ExtensionIndex from pandas._libs.interval import ( Interval as Interval, IntervalMixin as IntervalMixin, ) +from pandas._libs.tslibs import BaseOffset from pandas._typing import ( + DatetimeLike, DtypeArg, IntervalClosedType, + Label, + npt, ) from pandas.core.dtypes.dtypes import IntervalDtype as IntervalDtype @@ -28,7 +38,11 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): @classmethod def from_breaks( cls, - breaks, + breaks: Sequence[int] + | Sequence[float] + | Sequence[DatetimeLike] + | npt.NDArray[np.int_] + | npt.NDArray[np.float_], closed: IntervalClosedType = ..., name: Hashable = ..., copy: bool = ..., @@ -37,8 +51,16 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): @classmethod def from_arrays( cls, - left, - right, + left: Sequence[int] + | Sequence[float] + | Sequence[DatetimeLike] + | npt.NDArray[np.int_] + | npt.NDArray[np.float_], + right: Sequence[int] + | Sequence[float] + | Sequence[DatetimeLike] + | npt.NDArray[np.int_] + | npt.NDArray[np.float_], closed: IntervalClosedType = ..., name: Hashable = ..., copy: bool = ..., @@ -47,37 +69,57 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): @classmethod def from_tuples( cls, - data, + data: Sequence[tuple[int, int]] + | Sequence[tuple[float, float]] + | Sequence[tuple[DatetimeLike, DatetimeLike]] + | npt.NDArray, closed: IntervalClosedType = ..., name: Hashable = ..., copy: bool = ..., dtype: IntervalDtype | None = ..., ) -> IntervalIndex: ... + def __contains__(self, key: Any) -> bool: ... def astype(self, dtype: DtypeArg, copy: bool = ...) -> IntervalIndex: ... @property def inferred_type(self) -> str: ... def memory_usage(self, deep: bool = ...) -> int: ... + def is_monotonic_decreasing(self) -> bool: ... + def is_unique(self) -> bool: ... @property def is_overlapping(self) -> bool: ... - def get_loc(self, key, tolerance=...) -> int | slice | np.ndarray: ... + # Note: tolerance removed as it has no effect + def get_loc( + self, + key: Label, + method: str | None = ..., + ) -> int | slice | np.ndarray: ... def get_indexer( self, - targetArrayLike, + target: Index, method: str | None = ..., limit: int | None = ..., tolerance=..., ) -> np.ndarray: ... def get_indexer_non_unique( - self, targetArrayLike - ) -> tuple[np.ndarray, np.ndarray]: ... + self, target: Index + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + @property + def left(self) -> Index: ... + @property + def right(self) -> Index: ... + @property + def mid(self) -> Index: ... + @property + def length(self) -> Index: ... def get_value(self, series: ABCSeries, key): ... @property def is_all_dates(self) -> bool: ... - def __lt__(self, other): ... - def __le__(self, other): ... - def __gt__(self, other): ... - def __ge__(self, other): ... def interval_range( - start=..., end=..., periods=..., freq=..., name=..., closed: str = ... -): ... + start: int | float | DatetimeLike | None = ..., + end: int | float | DatetimeLike | None = ..., + periods: int | None = ..., + freq: int | str | BaseOffset | None = ..., + name: Hashable = ..., + closed: IntervalClosedType = ..., +) -> IntervalIndex: ... From 543b7e771ac14611fcf0a111b89e3177e697cf9a Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 2 Oct 2022 08:01:26 +0100 Subject: [PATCH 2/3] TYP: Improve typing accuracy --- pandas-stubs/core/indexes/base.pyi | 8 +++- pandas-stubs/core/indexes/interval.pyi | 55 +++++++++++++++----------- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi index 186bed95c..b9b823f75 100644 --- a/pandas-stubs/core/indexes/base.pyi +++ b/pandas-stubs/core/indexes/base.pyi @@ -28,6 +28,7 @@ from pandas._typing import ( Dtype, DtypeArg, DtypeObj, + FillnaOptions, HashableT, IndexT, Label, @@ -155,7 +156,12 @@ class Index(IndexOpsMixin, PandasObject): def symmetric_difference( self, other: list[T1] | Index, result_name=..., sort=... ) -> Index: ... - def get_loc(self, key, tolerance=...): ... + def get_loc( + self, + key: Label, + method: FillnaOptions | Literal["nearest"] | None = ..., + tolerance=..., + ): ... def get_indexer(self, target, method=..., limit=..., tolerance=...): ... def reindex(self, target, method=..., level=..., limit=..., tolerance=...): ... def join( diff --git a/pandas-stubs/core/indexes/interval.pyi b/pandas-stubs/core/indexes/interval.pyi index 47f970a50..da29b4f6b 100644 --- a/pandas-stubs/core/indexes/interval.pyi +++ b/pandas-stubs/core/indexes/interval.pyi @@ -2,21 +2,26 @@ import datetime as dt from typing import ( Any, Hashable, + Literal, Sequence, + Union, ) import numpy as np +import pandas as pd from pandas import Index from pandas.core.indexes.extension import ExtensionIndex +from typing_extensions import TypeAlias from pandas._libs.interval import ( Interval as Interval, IntervalMixin as IntervalMixin, ) -from pandas._libs.tslibs import BaseOffset +from pandas._libs.tslibs.offsets import DateOffset from pandas._typing import ( DatetimeLike, DtypeArg, + FillnaOptions, IntervalClosedType, Label, npt, @@ -25,6 +30,20 @@ from pandas._typing import ( from pandas.core.dtypes.dtypes import IntervalDtype as IntervalDtype from pandas.core.dtypes.generic import ABCSeries +_Edges: TypeAlias = Union[ + Sequence[int], + Sequence[float], + Sequence[DatetimeLike], + npt.NDArray[np.int_], + npt.NDArray[np.float_], + npt.NDArray[np.datetime64], + pd.Series[int], + pd.Series[float], + pd.Series[pd.Timestamp], + pd.Int64Index, + pd.DatetimeIndex, +] + class IntervalIndex(IntervalMixin, ExtensionIndex): def __new__( cls, @@ -38,11 +57,7 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): @classmethod def from_breaks( cls, - breaks: Sequence[int] - | Sequence[float] - | Sequence[DatetimeLike] - | npt.NDArray[np.int_] - | npt.NDArray[np.float_], + breaks: _Edges, closed: IntervalClosedType = ..., name: Hashable = ..., copy: bool = ..., @@ -51,16 +66,8 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): @classmethod def from_arrays( cls, - left: Sequence[int] - | Sequence[float] - | Sequence[DatetimeLike] - | npt.NDArray[np.int_] - | npt.NDArray[np.float_], - right: Sequence[int] - | Sequence[float] - | Sequence[DatetimeLike] - | npt.NDArray[np.int_] - | npt.NDArray[np.float_], + left: _Edges, + right: _Edges, closed: IntervalClosedType = ..., name: Hashable = ..., copy: bool = ..., @@ -83,23 +90,23 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): @property def inferred_type(self) -> str: ... def memory_usage(self, deep: bool = ...) -> int: ... - def is_monotonic_decreasing(self) -> bool: ... - def is_unique(self) -> bool: ... @property def is_overlapping(self) -> bool: ... - # Note: tolerance removed as it has no effect + # Note: tolerance no effect. It is included in all get_loc so + # that signatures are consistent with base even though it is usually not used def get_loc( self, key: Label, - method: str | None = ..., - ) -> int | slice | np.ndarray: ... + method: FillnaOptions | Literal["nearest"] | None = ..., + tolerance=..., + ) -> int | slice | npt.NDArray[np.bool_]: ... def get_indexer( self, target: Index, - method: str | None = ..., + method: FillnaOptions | Literal["nearest"] | None = ..., limit: int | None = ..., tolerance=..., - ) -> np.ndarray: ... + ) -> npt.NDArray[np.intp]: ... def get_indexer_non_unique( self, target: Index ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... @@ -119,7 +126,7 @@ def interval_range( start: int | float | DatetimeLike | None = ..., end: int | float | DatetimeLike | None = ..., periods: int | None = ..., - freq: int | str | BaseOffset | None = ..., + freq: int | str | DateOffset | None = ..., name: Hashable = ..., closed: IntervalClosedType = ..., ) -> IntervalIndex: ... From 644384328ed3eed2bdb687f9d90f17abc54e9587 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 2 Oct 2022 23:27:39 +0100 Subject: [PATCH 3/3] TST: Add tests for IntervalIndex and interval_range --- pandas-stubs/_typing.pyi | 4 +- pandas-stubs/core/indexes/interval.pyi | 17 +- tests/test_indexes.py | 293 +++++++++++++++++++++++++ 3 files changed, 308 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 2dee6ad3a..a14b9647d 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -44,9 +44,7 @@ PandasScalar: TypeAlias = Union[ ] # Scalar: TypeAlias = Union[PythonScalar, PandasScalar] -DatetimeLike: TypeAlias = Union[ - datetime.date, datetime.datetime, np.datetime64, Timestamp -] +DatetimeLike: TypeAlias = Union[datetime.datetime, np.datetime64, Timestamp] # dtypes NpDtype: TypeAlias = Union[ diff --git a/pandas-stubs/core/indexes/interval.pyi b/pandas-stubs/core/indexes/interval.pyi index da29b4f6b..339f99053 100644 --- a/pandas-stubs/core/indexes/interval.pyi +++ b/pandas-stubs/core/indexes/interval.pyi @@ -5,6 +5,7 @@ from typing import ( Literal, Sequence, Union, + overload, ) import numpy as np @@ -122,11 +123,21 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): @property def is_all_dates(self) -> bool: ... +@overload def interval_range( - start: int | float | DatetimeLike | None = ..., - end: int | float | DatetimeLike | None = ..., + start: int | float | None = ..., + end: int | float | None = ..., periods: int | None = ..., - freq: int | str | DateOffset | None = ..., + freq: int | None = ..., + name: Hashable = ..., + closed: IntervalClosedType = ..., +) -> IntervalIndex: ... +@overload +def interval_range( + start: DatetimeLike | None = ..., + end: DatetimeLike | None = ..., + periods: int | None = ..., + freq: str | DateOffset | None = ..., name: Hashable = ..., closed: IntervalClosedType = ..., ) -> IntervalIndex: ... diff --git a/tests/test_indexes.py b/tests/test_indexes.py index b426f620d..c8dca4de6 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime as dt from typing import Union import numpy as np @@ -180,3 +181,295 @@ def test_range_index_union(): ), pd.Index, ) + + +def test_interval_range(): + check(assert_type(pd.interval_range(0, 10), pd.IntervalIndex), pd.IntervalIndex) + check( + assert_type( + pd.interval_range(0, 10, name="something", closed="both"), pd.IntervalIndex + ), + pd.IntervalIndex, + ) + check(assert_type(pd.interval_range(0.0, 10), pd.IntervalIndex), pd.IntervalIndex) + check( + assert_type( + pd.interval_range(dt.datetime(2000, 1, 1), dt.datetime(2010, 1, 1), 5), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.interval_range( + np.datetime64("2000-01-01"), np.datetime64("2020-01-01"), 5 + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.interval_range(pd.Timestamp(2000, 1, 1), pd.Timestamp(2010, 1, 1), 5), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.interval_range( + pd.Timestamp(2000, 1, 1), pd.Timestamp(2010, 1, 1), freq="1M" + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.interval_range( + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2010, 1, 1), + freq=pd.DateOffset(months=2), + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.interval_range(pd.Timestamp(2000, 1, 1), dt.datetime(2010, 1, 1), 5), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + + +def test_interval_index_breaks(): + check( + assert_type(pd.IntervalIndex.from_breaks([1, 2, 3, 4]), pd.IntervalIndex), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks([1.0, 2.0, 3.0, 4.0]), pd.IntervalIndex + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks(np.array([1, 2, 3, 4])), pd.IntervalIndex + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks(np.array([1.0, 2.0, 3.0, 4.0])), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks( + np.array( + [ + np.datetime64("2000-01-01"), + np.datetime64("2001-01-01"), + np.datetime64("2002-01-01"), + np.datetime64("2003-01-01"), + ] + ) + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks(pd.Series([1, 2, 3, 4])), pd.IntervalIndex + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks(pd.Series([1.0, 2.0, 3.0, 4.0])), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks( + pd.Series( + [ + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2001, 1, 1), + pd.Timestamp(2002, 1, 1), + pd.Timestamp(2003, 1, 1), + ] + ) + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_breaks( + [ + dt.datetime(2000, 1, 1), + dt.datetime(2001, 1, 1), + dt.datetime(2002, 1, 1), + dt.datetime(2003, 1, 1), + ] + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + + +def test_interval_index_arrays(): + check( + assert_type( + pd.IntervalIndex.from_arrays([1, 2, 3, 4], [2, 3, 4, 5]), pd.IntervalIndex + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_arrays([1.0, 2.0, 3.0, 4.0], [2.0, 3.0, 4.0, 5.0]), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_arrays( + np.array([1, 2, 3, 4]), np.array([2, 3, 4, 5]) + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_arrays( + np.array([1.0, 2.0, 3.0, 4.0]), np.array([2.0, 3.0, 4.0, 5.0]) + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_arrays( + np.array( + [ + np.datetime64("2000-01-01"), + np.datetime64("2001-01-01"), + np.datetime64("2002-01-01"), + np.datetime64("2003-01-01"), + ] + ), + np.array( + [ + np.datetime64("2001-01-01"), + np.datetime64("2002-01-01"), + np.datetime64("2003-01-01"), + np.datetime64("2004-01-01"), + ] + ), + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + + check( + assert_type( + pd.IntervalIndex.from_arrays( + pd.Series([1, 2, 3, 4]), pd.Series([2, 3, 4, 5]) + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_arrays( + pd.Series([1.0, 2.0, 3.0, 4.0]), pd.Series([2.0, 3.0, 4.0, 5.0]) + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_arrays( + pd.Series( + [ + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2001, 1, 1), + pd.Timestamp(2002, 1, 1), + pd.Timestamp(2003, 1, 1), + ] + ), + pd.Series( + [ + pd.Timestamp(2001, 1, 1), + pd.Timestamp(2002, 1, 1), + pd.Timestamp(2003, 1, 1), + pd.Timestamp(2004, 1, 1), + ] + ), + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_arrays( + [ + dt.datetime(2000, 1, 1), + dt.datetime(2001, 1, 1), + dt.datetime(2002, 1, 1), + dt.datetime(2003, 1, 1), + ], + [ + dt.datetime(2001, 1, 1), + dt.datetime(2002, 1, 1), + dt.datetime(2003, 1, 1), + dt.datetime(2004, 1, 1), + ], + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + ) + + +def test_interval_index_tuples(): + check( + assert_type(pd.IntervalIndex.from_tuples([(1, 2), (2, 3)]), pd.IntervalIndex), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_tuples([(1.0, 2.0), (2.0, 3.0)]), pd.IntervalIndex + ), + pd.IntervalIndex, + ) + check( + assert_type( + pd.IntervalIndex.from_tuples( + [ + (pd.Timestamp(2000, 1, 1), pd.Timestamp(2001, 1, 1)), + (pd.Timestamp(2001, 1, 1), pd.Timestamp(2002, 1, 1)), + ] + ), + pd.IntervalIndex, + ), + pd.IntervalIndex, + )