Skip to content

Commit 6f137b3

Browse files
committed
updates
1 parent ee5c97d commit 6f137b3

File tree

2 files changed

+12
-37
lines changed

2 files changed

+12
-37
lines changed

pandas/core/dtypes/dtypes.py

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -121,26 +121,6 @@ class CategoricalDtype(ExtensionDtype):
121121
categories : list or None
122122
ordered : bool, default False
123123
124-
Notes
125-
-----
126-
An instance of ``CategoricalDtype`` compares equal with any other
127-
instance of ``CategoricalDtype``, regardless of categories or ordered.
128-
In addition they compare equal to the string ``'category'``.
129-
130-
To check whether two instances of a ``CategoricalDtype`` exactly,
131-
use the ``is`` operator.
132-
133-
>>> t1 = CategoricalDtype(['a', 'b'], ordered=True)
134-
>>> t2 = CategoricalDtype(['a', 'c'], ordered=False)
135-
>>> t1 == t2
136-
True
137-
>>> t1 == 'category'
138-
True
139-
>>> t1 is t2
140-
False
141-
>>> t1 is CategoricalDtype(['a', 'b'], ordered=True)
142-
True
143-
144124
Examples
145125
--------
146126
>>> t = CategoricalDtype(categories=['b', 'a'], ordered=True)
@@ -160,17 +140,16 @@ class CategoricalDtype(ExtensionDtype):
160140
kind = 'O'
161141
str = '|O08'
162142
base = np.dtype('O')
163-
_metadata = []
143+
_metadata = ['categories', 'ordered']
164144
_cache = weakref.WeakValueDictionary()
165145

166146
def __new__(cls, categories=None, ordered=False, fastpath=False):
167147
from pandas.core.indexes.base import Index
168148
if categories is not None:
169149
categories = Index(categories)
170-
# Can just inline _validate_* if needed
150+
# validation
171151
cls._validate_categories(categories, fastpath=fastpath)
172152
cls._validate_ordered(ordered)
173-
174153
# We have a choice when hashing pandas unordered categoricals
175154
# We can completely ignore the order, or not. I.e. should
176155
# [a, b, c] hash the same as [b, a, c], when both are unordered?
@@ -184,7 +163,15 @@ def __new__(cls, categories=None, ordered=False, fastpath=False):
184163
hashed = cls._hash_categories(categories, ordered=True)
185164
else:
186165
hashed = None
187-
return cls._get_or_create(categories, ordered, hashed)
166+
167+
try:
168+
return cls._cache[(hashed, ordered)]
169+
except KeyError:
170+
categorical = object.__new__(cls)
171+
categorical._categories = categories
172+
categorical._ordered = ordered
173+
cls._cache[(hashed, ordered)] = categorical
174+
return categorical
188175

189176
def __hash__(self):
190177
# _hash_categories returns a uint64, so use the negative
@@ -237,18 +224,6 @@ def _hash_categories(categories, ordered=True):
237224
hashed = np.bitwise_xor.reduce(hashed)
238225
return hashed
239226

240-
@classmethod
241-
def _get_or_create(cls, categories, ordered, hashed):
242-
243-
try:
244-
return cls._cache[(hashed, ordered)]
245-
except KeyError:
246-
categorical = object.__new__(cls)
247-
categorical._categories = categories
248-
categorical._ordered = ordered
249-
cls._cache[(hashed, ordered)] = categorical
250-
return categorical
251-
252227
@classmethod
253228
def construct_from_string(cls, string):
254229
""" attempt to construct this type from a string, raise a TypeError if

pandas/core/util/hashing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
260260

261261
# For categoricals, we hash the categories, then remap the codes to the
262262
# hash values. (This check is above the complex check so that we don't ask
263-
# numpy if categorical is a subdtype of complex, as it will choke.
263+
# numpy if categorical is a subdtype of complex, as it will choke).
264264
if is_categorical_dtype(dtype):
265265
return _hash_categorical(vals, encoding, hash_key)
266266

0 commit comments

Comments
 (0)