Skip to content

REF: implement Dtype.index_class #54511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pandas/core/dtypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from pandas._libs import missing as libmissing
from pandas._libs.hashtable import object_hash
from pandas._libs.properties import cache_readonly
from pandas.errors import AbstractMethodError

from pandas.core.dtypes.generic import (
Expand All @@ -32,6 +33,7 @@
type_t,
)

from pandas import Index
from pandas.core.arrays import ExtensionArray

# To parameterize on same ExtensionDtype
Expand Down Expand Up @@ -406,6 +408,16 @@ def _is_immutable(self) -> bool:
"""
return False

@cache_readonly
def index_class(self) -> type_t[Index]:
"""
The Index subclass to return from Index.__new__ when this dtype is
encountered.
"""
from pandas import Index

return Index


class StorageExtensionDtype(ExtensionDtype):
"""ExtensionDtype that may be backed by more than one implementation."""
Expand Down
28 changes: 28 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@

from pandas import (
Categorical,
CategoricalIndex,
DatetimeIndex,
Index,
IntervalIndex,
PeriodIndex,
)
from pandas.core.arrays import (
BaseMaskedArray,
Expand Down Expand Up @@ -671,6 +675,12 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:

return find_common_type(non_cat_dtypes)

@cache_readonly
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you make these properties like the base class?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated to make both cache_readonly; I think the perf improvement in the OP depends on it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated + greenish

def index_class(self) -> type_t[CategoricalIndex]:
from pandas import CategoricalIndex

return CategoricalIndex


@register_extension_dtype
class DatetimeTZDtype(PandasExtensionDtype):
Expand Down Expand Up @@ -911,6 +921,12 @@ def __setstate__(self, state) -> None:
self._tz = state["tz"]
self._unit = state["unit"]

@cache_readonly
def index_class(self) -> type_t[DatetimeIndex]:
from pandas import DatetimeIndex

return DatetimeIndex


@register_extension_dtype
class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
Expand Down Expand Up @@ -1121,6 +1137,12 @@ def __from_arrow__(self, array: pa.Array | pa.ChunkedArray) -> PeriodArray:
return PeriodArray(np.array([], dtype="int64"), dtype=self, copy=False)
return PeriodArray._concat_same_type(results)

@cache_readonly
def index_class(self) -> type_t[PeriodIndex]:
from pandas import PeriodIndex

return PeriodIndex


@register_extension_dtype
class IntervalDtype(PandasExtensionDtype):
Expand Down Expand Up @@ -1384,6 +1406,12 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
return np.dtype(object)
return IntervalDtype(common, closed=closed)

@cache_readonly
def index_class(self) -> type_t[IntervalIndex]:
from pandas import IntervalIndex

return IntervalIndex


class NumpyEADtype(ExtensionDtype):
"""
Expand Down
19 changes: 1 addition & 18 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,24 +594,7 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423

if isinstance(dtype, ExtensionDtype):
if isinstance(dtype, DatetimeTZDtype):
from pandas import DatetimeIndex

return DatetimeIndex
elif isinstance(dtype, CategoricalDtype):
from pandas import CategoricalIndex

return CategoricalIndex
elif isinstance(dtype, IntervalDtype):
from pandas import IntervalIndex

return IntervalIndex
elif isinstance(dtype, PeriodDtype):
from pandas import PeriodIndex

return PeriodIndex

return Index
return dtype.index_class

if dtype.kind == "M":
from pandas import DatetimeIndex
Expand Down