21
21
import pytz
22
22
23
23
from pandas ._libs .interval import Interval
24
+ from pandas ._libs .properties import cache_readonly
24
25
from pandas ._libs .tslibs import (
25
26
BaseOffset ,
26
27
NaT ,
@@ -86,7 +87,7 @@ class PandasExtensionDtype(ExtensionDtype):
86
87
base : Optional [DtypeObj ] = None
87
88
isbuiltin = 0
88
89
isnative = 0
89
- _cache : Dict [str_type , PandasExtensionDtype ] = {}
90
+ _cache_dtypes : Dict [str_type , PandasExtensionDtype ] = {}
90
91
91
92
def __str__ (self ) -> str_type :
92
93
"""
@@ -110,7 +111,7 @@ def __getstate__(self) -> Dict[str_type, Any]:
110
111
@classmethod
111
112
def reset_cache (cls ) -> None :
112
113
""" clear the cache """
113
- cls ._cache = {}
114
+ cls ._cache_dtypes = {}
114
115
115
116
116
117
class CategoricalDtypeType (type ):
@@ -182,7 +183,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
182
183
str = "|O08"
183
184
base = np .dtype ("O" )
184
185
_metadata = ("categories" , "ordered" )
185
- _cache : Dict [str_type , PandasExtensionDtype ] = {}
186
+ _cache_dtypes : Dict [str_type , PandasExtensionDtype ] = {}
186
187
187
188
def __init__ (self , categories = None , ordered : Ordered = False ):
188
189
self ._finalize (categories , ordered , fastpath = False )
@@ -351,17 +352,6 @@ def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
351
352
self ._categories = state .pop ("categories" , None )
352
353
self ._ordered = state .pop ("ordered" , False )
353
354
354
- def __hash__ (self ) -> int :
355
- # _hash_categories returns a uint64, so use the negative
356
- # space for when we have unknown categories to avoid a conflict
357
- if self .categories is None :
358
- if self .ordered :
359
- return - 1
360
- else :
361
- return - 2
362
- # We *do* want to include the real self.ordered here
363
- return int (self ._hash_categories (self .categories , self .ordered ))
364
-
365
355
def __eq__ (self , other : Any ) -> bool :
366
356
"""
367
357
Rules for CDT equality:
@@ -434,14 +424,28 @@ def __repr__(self) -> str_type:
434
424
data = data .rstrip (", " )
435
425
return f"CategoricalDtype(categories={ data } , ordered={ self .ordered } )"
436
426
437
- @staticmethod
438
- def _hash_categories (categories , ordered : Ordered = True ) -> int :
427
+ def __hash__ (self ) -> int :
428
+ # _hash_categories returns a uint64, so use the negative
429
+ # space for when we have unknown categories to avoid a conflict
430
+ if self .categories is None :
431
+ if self .ordered :
432
+ return - 1
433
+ else :
434
+ return - 2
435
+ return int (self ._hash_categories )
436
+
437
+ @cache_readonly
438
+ def _hash_categories (self ) -> int :
439
439
from pandas .core .util .hashing import (
440
440
combine_hash_arrays ,
441
441
hash_array ,
442
442
hash_tuples ,
443
443
)
444
444
445
+ # We *do* want to include the real self.ordered here
446
+ categories = self .categories
447
+ ordered = self .ordered
448
+
445
449
if len (categories ) and isinstance (categories [0 ], tuple ):
446
450
# assumes if any individual category is a tuple, then all our. ATM
447
451
# I don't really want to support just some of the categories being
@@ -678,7 +682,7 @@ class DatetimeTZDtype(PandasExtensionDtype):
678
682
na_value = NaT
679
683
_metadata = ("unit" , "tz" )
680
684
_match = re .compile (r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]" )
681
- _cache : Dict [str_type , PandasExtensionDtype ] = {}
685
+ _cache_dtypes : Dict [str_type , PandasExtensionDtype ] = {}
682
686
683
687
def __init__ (self , unit : Union [str_type , DatetimeTZDtype ] = "ns" , tz = None ):
684
688
if isinstance (unit , DatetimeTZDtype ):
@@ -844,7 +848,7 @@ class PeriodDtype(dtypes.PeriodDtypeBase, PandasExtensionDtype):
844
848
num = 102
845
849
_metadata = ("freq" ,)
846
850
_match = re .compile (r"(P|p)eriod\[(?P<freq>.+)\]" )
847
- _cache : Dict [str_type , PandasExtensionDtype ] = {}
851
+ _cache_dtypes : Dict [str_type , PandasExtensionDtype ] = {}
848
852
849
853
def __new__ (cls , freq = None ):
850
854
"""
@@ -866,12 +870,12 @@ def __new__(cls, freq=None):
866
870
freq = cls ._parse_dtype_strict (freq )
867
871
868
872
try :
869
- return cls ._cache [freq .freqstr ]
873
+ return cls ._cache_dtypes [freq .freqstr ]
870
874
except KeyError :
871
875
dtype_code = freq ._period_dtype_code
872
876
u = dtypes .PeriodDtypeBase .__new__ (cls , dtype_code )
873
877
u ._freq = freq
874
- cls ._cache [freq .freqstr ] = u
878
+ cls ._cache_dtypes [freq .freqstr ] = u
875
879
return u
876
880
877
881
def __reduce__ (self ):
@@ -1049,7 +1053,7 @@ class IntervalDtype(PandasExtensionDtype):
1049
1053
_match = re .compile (
1050
1054
r"(I|i)nterval\[(?P<subtype>[^,]+)(, (?P<closed>(right|left|both|neither)))?\]"
1051
1055
)
1052
- _cache : Dict [str_type , PandasExtensionDtype ] = {}
1056
+ _cache_dtypes : Dict [str_type , PandasExtensionDtype ] = {}
1053
1057
1054
1058
def __new__ (cls , subtype = None , closed : Optional [str_type ] = None ):
1055
1059
from pandas .core .dtypes .common import (
@@ -1106,12 +1110,12 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None):
1106
1110
1107
1111
key = str (subtype ) + str (closed )
1108
1112
try :
1109
- return cls ._cache [key ]
1113
+ return cls ._cache_dtypes [key ]
1110
1114
except KeyError :
1111
1115
u = object .__new__ (cls )
1112
1116
u ._subtype = subtype
1113
1117
u ._closed = closed
1114
- cls ._cache [key ] = u
1118
+ cls ._cache_dtypes [key ] = u
1115
1119
return u
1116
1120
1117
1121
@property
0 commit comments