Skip to content

Commit 93db57e

Browse files
BUG: Categorical[dt64tz].to_numpy() losing tz (#38136)
* BUG: Categorical[dt64tz].to_numpy() losing tz * Update pandas/core/arrays/categorical.py Co-authored-by: gfyoung <gfyoung17+GitHub@gmail.com> * Avoid FutureWarning * whatsnew Co-authored-by: gfyoung <gfyoung17+GitHub@gmail.com>
1 parent 3f51060 commit 93db57e

File tree

5 files changed

+48
-12
lines changed

5 files changed

+48
-12
lines changed

doc/source/whatsnew/v1.2.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ Categorical
533533
- Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`)
534534
- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`)
535535
- Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`)
536-
-
536+
- Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with timezone-aware ``datetime64`` categories incorrectly dropping the timezone information instead of casting to object dtype (:issue:`38136`)
537537

538538
Datetimelike
539539
^^^^^^^^^^^^

pandas/conftest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,16 @@ def index_or_series(request):
320320
index_or_series2 = index_or_series
321321

322322

323+
@pytest.fixture(
324+
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
325+
)
326+
def index_or_series_or_array(request):
327+
"""
328+
Fixture to parametrize over Index, Series, and ExtensionArray
329+
"""
330+
return request.param
331+
332+
323333
@pytest.fixture
324334
def dict_subclass():
325335
"""

pandas/core/arrays/categorical.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,15 +1269,13 @@ def __array__(self, dtype=None) -> np.ndarray:
12691269
if dtype==None (default), the same dtype as
12701270
categorical.categories.dtype.
12711271
"""
1272-
ret = take_1d(self.categories.values, self._codes)
1272+
ret = take_1d(self.categories._values, self._codes)
12731273
if dtype and not is_dtype_equal(dtype, self.categories.dtype):
12741274
return np.asarray(ret, dtype)
1275-
if is_extension_array_dtype(ret):
1276-
# When we're a Categorical[ExtensionArray], like Interval,
1277-
# we need to ensure __array__ get's all the way to an
1278-
# ndarray.
1279-
ret = np.asarray(ret)
1280-
return ret
1275+
# When we're a Categorical[ExtensionArray], like Interval,
1276+
# we need to ensure __array__ gets all the way to an
1277+
# ndarray.
1278+
return np.asarray(ret)
12811279

12821280
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
12831281
# for binary ops, use our custom dunder methods

pandas/core/series.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,13 @@
8484
from pandas.core.generic import NDFrame
8585
from pandas.core.indexers import deprecate_ndim_indexing, unpack_1tuple
8686
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
87-
from pandas.core.indexes.api import Float64Index, Index, MultiIndex, ensure_index
87+
from pandas.core.indexes.api import (
88+
CategoricalIndex,
89+
Float64Index,
90+
Index,
91+
MultiIndex,
92+
ensure_index,
93+
)
8894
import pandas.core.indexes.base as ibase
8995
from pandas.core.indexes.datetimes import DatetimeIndex
9096
from pandas.core.indexes.period import PeriodIndex
@@ -412,7 +418,13 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
412418
labels = ensure_index(labels)
413419

414420
if labels._is_all_dates:
415-
if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
421+
deep_labels = labels
422+
if isinstance(labels, CategoricalIndex):
423+
deep_labels = labels.categories
424+
425+
if not isinstance(
426+
deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
427+
):
416428
try:
417429
labels = DatetimeIndex(labels)
418430
# need to set here because we changed the index

pandas/tests/base/test_conversion.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,18 +316,34 @@ def test_array_multiindex_raises():
316316
TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"),
317317
np.array([0, 3600000000000], dtype="m8[ns]"),
318318
),
319+
# GH#26406 tz is preserved in Categorical[dt64tz]
320+
(
321+
pd.Categorical(pd.date_range("2016-01-01", periods=2, tz="US/Pacific")),
322+
np.array(
323+
[
324+
Timestamp("2016-01-01", tz="US/Pacific"),
325+
Timestamp("2016-01-02", tz="US/Pacific"),
326+
]
327+
),
328+
),
319329
],
320330
)
321-
def test_to_numpy(array, expected, index_or_series):
322-
box = index_or_series
331+
def test_to_numpy(array, expected, index_or_series_or_array):
332+
box = index_or_series_or_array
323333
thing = box(array)
324334

325335
if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index:
326336
pytest.skip(f"No index type for {array.dtype}")
327337

338+
if array.dtype.name == "int64" and box is pd.array:
339+
pytest.xfail("thing is Int64 and to_numpy() returns object")
340+
328341
result = thing.to_numpy()
329342
tm.assert_numpy_array_equal(result, expected)
330343

344+
result = np.asarray(thing)
345+
tm.assert_numpy_array_equal(result, expected)
346+
331347

332348
@pytest.mark.parametrize("as_series", [True, False])
333349
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)