diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index e9e1881e879da..2e8e2345d4c0a 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -27,6 +27,7 @@ Bug fixes - Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`) - Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`) - Bug in :func:`Series.median` with :class:`ArrowDtype` returning an approximate median (:issue:`52679`) +- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on-categorical dtypes (:issue:`49889`) - Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`) - Bug in :func:`read_csv` casting PyArrow datetimes to NumPy when ``dtype_backend="pyarrow"`` and ``parse_dates`` is set causing a performance bottleneck in the process (:issue:`52546`) - Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 065387e1acefd..2bbb678516968 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -7,7 +7,6 @@ import numpy as np import pandas as pd -from pandas.core.interchange.column import PandasColumn from pandas.core.interchange.dataframe_protocol import ( Buffer, Column, @@ -181,9 +180,15 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]: raise NotImplementedError("Non-dictionary categoricals not supported yet") cat_column = categorical["categories"] - # for mypy/pyright - assert isinstance(cat_column, PandasColumn), "categories must be a PandasColumn" - categories = np.array(cat_column._col) + if hasattr(cat_column, "_col"): + # Item "Column" of "Optional[Column]" has no attribute "_col" + # Item "None" of "Optional[Column]" has no attribute "_col" + categories = np.array(cat_column._col) # type: ignore[union-attr] + else: + raise NotImplementedError( + "Interchanging categorical columns isn't supported yet, and our " + "fallback of using the `col._col` attribute (a ndarray) failed." + ) buffers = col.get_buffers() codes_buff, codes_dtype = buffers["data"] diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 078a17510d502..abdfb4e79cb20 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -74,6 +74,21 @@ def test_categorical_dtype(data): tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) +def test_categorical_pyarrow(): + # GH 49889 + pa = pytest.importorskip("pyarrow", "11.0.0") + + arr = ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"] + table = pa.table({"weekday": pa.array(arr).dictionary_encode()}) + exchange_df = table.__dataframe__() + result = from_dataframe(exchange_df) + weekday = pd.Categorical( + arr, categories=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] + ) + expected = pd.DataFrame({"weekday": weekday}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "data", [int_data, uint_data, float_data, bool_data, datetime_data] )