Closed
Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
(optional) I have confirmed this bug exists on the master branch of pandas.
Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.
Code Sample, a copy-pastable example
import pyarrow as pa
import pandas as pd
df = pd.DataFrame({"string": pd.array([], dtype="string")})
table = pa.Table.from_pandas(df)
print(table.column(0))
# <pyarrow.lib.ChunkedArray object at 0x12c5cec70>
# [
# []
# ]
# Construct a Table with zero chunks as returned by `pyarrow.dataset`
empty_table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
print(empty_table.column(0))
# <pyarrow.lib.ChunkedArray object at 0x12c5cee00>
# [
#
# ]
empty_table.to_pandas()
yields the following traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-5047304e95a2> in <module>
----> 1 empty_table.to_pandas()
~/mambaforge/envs/fletcher/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._PandasConvertible.to_pandas()
~/mambaforge/envs/fletcher/lib/python3.8/site-packages/pyarrow/table.pxi in pyarrow.lib.Table._to_pandas()
~/mambaforge/envs/fletcher/lib/python3.8/site-packages/pyarrow/pandas_compat.py in table_to_blockmanager(options, table, categories, ignore_metadata, types_mapper)
790 _check_data_column_metadata_consistency(all_columns)
791 columns = _deserialize_column_index(table, all_columns, column_indexes)
--> 792 blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
793
794 axes = [columns, index]
~/mambaforge/envs/fletcher/lib/python3.8/site-packages/pyarrow/pandas_compat.py in _table_to_blocks(options, block_table, categories, extension_columns)
1131 result = pa.lib.table_to_blocks(options, block_table, categories,
1132 list(extension_columns.keys()))
-> 1133 return [_reconstruct_block(item, columns, extension_columns)
1134 for item in result]
1135
~/mambaforge/envs/fletcher/lib/python3.8/site-packages/pyarrow/pandas_compat.py in <listcomp>(.0)
1131 result = pa.lib.table_to_blocks(options, block_table, categories,
1132 list(extension_columns.keys()))
-> 1133 return [_reconstruct_block(item, columns, extension_columns)
1134 for item in result]
1135
~/mambaforge/envs/fletcher/lib/python3.8/site-packages/pyarrow/pandas_compat.py in _reconstruct_block(item, columns, extension_columns)
749 raise ValueError("This column does not support to be converted "
750 "to a pandas ExtensionArray")
--> 751 pd_ext_arr = pandas_dtype.__from_arrow__(arr)
752 block = _int.make_block(pd_ext_arr, placement=placement,
753 klass=_int.ExtensionBlock)
~/Development/pandas/pandas/core/arrays/string_.py in __from_arrow__(self, array)
119 results.append(str_arr)
120
--> 121 return StringArray._concat_same_type(results)
122
123
~/Development/pandas/pandas/core/arrays/_mixins.py in _concat_same_type(cls, to_concat, axis)
240 dtypes = {str(x.dtype) for x in to_concat}
241 if len(dtypes) != 1:
--> 242 raise ValueError("to_concat must have the same dtype (tz)", dtypes)
243
244 new_values = [x._ndarray for x in to_concat]
ValueError: ('to_concat must have the same dtype (tz)', set())
Problem description
The problem here is that we are checking in _concat_same_type
whether we concat arrays of the same type but as we don't concatenate anything, there are also no types to check. This should instead create an empty array.