Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
arr = [None]
table = pa.table({"arr": pa.array(arr, 'float64').dictionary_encode()})
exchange_df = table.__dataframe__()
result = pd.api.interchange.from_dataframe(exchange_df)
Issue Description
I get
IndexError Traceback (most recent call last)
Cell In[2], line 4
2 table = pa.table({"arr": pa.array(arr, 'float64').dictionary_encode()})
3 exchange_df = table.__dataframe__()
----> 4 result = pd.api.interchange.from_dataframe(exchange_df)
File ~/tmp/.311venv/lib/python3.11/site-packages/pandas/core/interchange/from_dataframe.py:54, in from_dataframe(df, allow_copy)
51 if not hasattr(df, "__dataframe__"):
52 raise ValueError("`df` does not support __dataframe__")
---> 54 return _from_dataframe(df.__dataframe__(allow_copy=allow_copy))
File ~/tmp/.311venv/lib/python3.11/site-packages/pandas/core/interchange/from_dataframe.py:75, in _from_dataframe(df, allow_copy)
73 pandas_dfs = []
74 for chunk in df.get_chunks():
---> 75 pandas_df = protocol_df_chunk_to_pandas(chunk)
76 pandas_dfs.append(pandas_df)
78 if not allow_copy and len(pandas_dfs) > 1:
File ~/tmp/.311venv/lib/python3.11/site-packages/pandas/core/interchange/from_dataframe.py:125, in protocol_df_chunk_to_pandas(df)
123 columns[name], buf = primitive_column_to_ndarray(col)
124 elif dtype == DtypeKind.CATEGORICAL:
--> 125 columns[name], buf = categorical_column_to_series(col)
126 elif dtype == DtypeKind.STRING:
127 columns[name], buf = string_column_to_ndarray(col)
File ~/tmp/.311venv/lib/python3.11/site-packages/pandas/core/interchange/from_dataframe.py:205, in categorical_column_to_series(col)
199 codes = buffer_to_ndarray(
200 codes_buff, codes_dtype, offset=col.offset, length=col.size()
201 )
203 # Doing module in order to not get ``IndexError`` for
204 # out-of-bounds sentinel values in `codes`
--> 205 values = categories[codes % len(categories)]
207 cat = pd.Categorical(
208 values, categories=categories, ordered=categorical["is_ordered"]
209 )
210 data = pd.Series(cat)
IndexError: index 0 is out of bounds for axis 0 with size 0
Expected Behavior
In [6]: result
Out[6]:
arr
0 NaN
Installed Versions
INSTALLED VERSIONS
commit : 042ebab
python : 3.11.3.final.0
python-bits : 64
OS : Linux
OS-release : 5.10.102.1-microsoft-standard-WSL2
Version : #1 SMP Wed Mar 2 00:30:59 UTC 2022
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_GB.UTF-8
LOCALE : en_GB.UTF-8
pandas : 2.1.0.dev0+696.g042ebab024
numpy : 1.24.1
pytz : 2022.7.1
dateutil : 2.8.2
setuptools : 65.6.3
pip : 22.3.1
Cython : 0.29.33
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : None
IPython : 8.8.0
pandas_datareader: None
bs4 : None
bottleneck : None
brotli : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : 11.0.0
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : 1.10.1
snappy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : 2023.2.0
xlrd : None
zstandard : None
tzdata : 2023.3
qtpy : None
pyqt5 : None
None