Skip to content

Commit 0cdaa71

Browse files
phoflmroeschke
andauthored
ENH: Add dtype of categories to repr of CategoricalDtype (#52202)
* ENH: Add dtype of categories to repr of CategoricalDtype * Fix * Update doc/source/whatsnew/v2.1.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * Fix docs --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
1 parent 7aab3b4 commit 0cdaa71

File tree

6 files changed

+35
-11
lines changed

6 files changed

+35
-11
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Other enhancements
3838
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
3939
- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
4040
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
41+
- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`)
4142

4243
.. ---------------------------------------------------------------------------
4344
.. _whatsnew_210.notable_bug_fixes:

pandas/core/dtypes/dtypes.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -253,11 +253,11 @@ def _from_values_or_dtype(
253253
Examples
254254
--------
255255
>>> pd.CategoricalDtype._from_values_or_dtype()
256-
CategoricalDtype(categories=None, ordered=None)
256+
CategoricalDtype(categories=None, ordered=None, categories_dtype=None)
257257
>>> pd.CategoricalDtype._from_values_or_dtype(
258258
... categories=['a', 'b'], ordered=True
259259
... )
260-
CategoricalDtype(categories=['a', 'b'], ordered=True)
260+
CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object)
261261
>>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True)
262262
>>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False)
263263
>>> c = pd.Categorical([0, 1], dtype=dtype1, fastpath=True)
@@ -272,7 +272,7 @@ def _from_values_or_dtype(
272272
The supplied dtype takes precedence over values' dtype:
273273
274274
>>> pd.CategoricalDtype._from_values_or_dtype(c, dtype=dtype2)
275-
CategoricalDtype(categories=['x', 'y'], ordered=False)
275+
CategoricalDtype(categories=['x', 'y'], ordered=False, categories_dtype=object)
276276
"""
277277

278278
if dtype is not None:
@@ -429,13 +429,19 @@ def __eq__(self, other: Any) -> bool:
429429
def __repr__(self) -> str_type:
430430
if self.categories is None:
431431
data = "None"
432+
dtype = "None"
432433
else:
433434
data = self.categories._format_data(name=type(self).__name__)
434435
if data is None:
435436
# self.categories is RangeIndex
436437
data = str(self.categories._range)
437438
data = data.rstrip(", ")
438-
return f"CategoricalDtype(categories={data}, ordered={self.ordered})"
439+
dtype = self.categories.dtype
440+
441+
return (
442+
f"CategoricalDtype(categories={data}, ordered={self.ordered}, "
443+
f"categories_dtype={dtype})"
444+
)
439445

440446
@cache_readonly
441447
def _hash_categories(self) -> int:

pandas/io/json/_table_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
183183
... "ordered": True,
184184
... }
185185
... )
186-
CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)
186+
CategoricalDtype(categories=['a', 'b', 'c'], ordered=True, categories_dtype=object)
187187
188188
>>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"})
189189
'datetime64[ns]'

pandas/tests/dtypes/test_dtypes.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,10 @@ def test_repr_range_categories(self):
211211
dtype = CategoricalDtype(categories=rng, ordered=False)
212212
result = repr(dtype)
213213

214-
expected = "CategoricalDtype(categories=range(0, 3), ordered=False)"
214+
expected = (
215+
"CategoricalDtype(categories=range(0, 3), ordered=False, "
216+
"categories_dtype=int64)"
217+
)
215218
assert result == expected
216219

217220
def test_update_dtype(self):
@@ -220,6 +223,15 @@ def test_update_dtype(self):
220223
expected = CategoricalDtype(["b"], ordered=True)
221224
assert result == expected
222225

226+
def test_repr(self):
227+
cat = Categorical(pd.Index([1, 2, 3], dtype="int32"))
228+
result = cat.dtype.__repr__()
229+
expected = (
230+
"CategoricalDtype(categories=[1, 2, 3], ordered=False, "
231+
"categories_dtype=int32)"
232+
)
233+
assert result == expected
234+
223235

224236
class TestDatetimeTZDtype(Base):
225237
@pytest.fixture
@@ -980,7 +992,10 @@ def test_str_vs_repr(self, ordered):
980992
c1 = CategoricalDtype(["a", "b"], ordered=ordered)
981993
assert str(c1) == "category"
982994
# Py2 will have unicode prefixes
983-
pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)"
995+
pat = (
996+
r"CategoricalDtype\(categories=\[.*\], ordered={ordered}, "
997+
r"categories_dtype=object\)"
998+
)
984999
assert re.match(pat.format(ordered=ordered), repr(c1))
9851000

9861001
def test_categorical_categories(self):

pandas/tests/util/test_assert_index_equal.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,10 @@ def test_index_equal_category_mismatch(check_categorical):
209209
msg = """Index are different
210210
211211
Attribute "dtype" are different
212-
\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\)
212+
\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False, \
213+
categories_dtype=object\\)
213214
\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], \
214-
ordered=False\\)"""
215+
ordered=False, categories_dtype=object\\)"""
215216

216217
idx1 = Index(Categorical(["a", "b"]))
217218
idx2 = Index(Categorical(["a", "b"], categories=["a", "b", "c"]))

pandas/tests/util/test_assert_series_equal.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,10 @@ def test_series_equal_categorical_mismatch(check_categorical):
250250
msg = """Attributes of Series are different
251251
252252
Attribute "dtype" are different
253-
\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\)
253+
\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False, \
254+
categories_dtype=object\\)
254255
\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], \
255-
ordered=False\\)"""
256+
ordered=False, categories_dtype=object\\)"""
256257

257258
s1 = Series(Categorical(["a", "b"]))
258259
s2 = Series(Categorical(["a", "b"], categories=list("abc")))

0 commit comments

Comments
 (0)