Closed
Description
I attempting to convert from Arrow to Pandas using the types_mapper
argument of to_pandas
, I get the following error:
________________ test_list_rows_nullable_scalars_dtypes[None] ___________________
bigquery_client = <google.cloud.bigquery.client.Client object at 0x197b50a30>
scalars_table = 'swast-scratch.python_bigquery_tests_system_20211102220533_55dcac.scalars', max_results = None
@pytest.mark.parametrize(
("max_results",), ((None,), (10,),) # Use BQ Storage API. # Use REST API.
)
def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results):
# TODO(GH#836): Avoid INTERVAL columns until they are supported by the
# BigQuery Storage API and pyarrow.
schema = [
bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN),
bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC),
bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES),
bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE),
bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME),
bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64),
bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY),
bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64),
bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC),
bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING),
bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME),
bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP),
]
> df = bigquery_client.list_rows(
scalars_table, max_results=max_results, selected_fields=schema,
).to_dataframe()
tests/system/test_pandas.py:1030:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/table.py:1946: in to_dataframe
df = record_batch.to_pandas(
pyarrow/array.pxi:757: in pyarrow.lib._PandasConvertible.to_pandas
???
pyarrow/table.pxi:1740: in pyarrow.lib.Table._to_pandas
???
/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:789: in table_to_blockmanager
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:1130: in _table_to_blocks
return [_reconstruct_block(item, columns, extension_columns)
/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:1130: in <listcomp>
return [_reconstruct_block(item, columns, extension_columns)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
item = {'placement': array([3]), 'py_array': <pyarrow.lib.ChunkedArray object at 0x197c53db0>
[
[
2021-07-21,
null
]
]}
columns = ['bool_col', 'bignumeric_col', 'bytes_col', 'date_col', 'datetime_col', 'float64_col', ...]
extension_columns = {'date_col': <class 'db_dtypes.DateDtype'>}
def _reconstruct_block(item, columns=None, extension_columns=None):
"""
Construct a pandas Block from the `item` dictionary coming from pyarrow's
serialization or returned by arrow::python::ConvertTableToPandas.
This function takes care of converting dictionary types to pandas
categorical, Timestamp-with-timezones to the proper pandas Block, and
conversion to pandas ExtensionBlock
Parameters
----------
item : dict
For basic types, this is a dictionary in the form of
{'block': np.ndarray of values, 'placement': pandas block placement}.
Additional keys are present for other types (dictionary, timezone,
object).
columns :
Column names of the table being constructed, used for extension types
extension_columns : dict
Dictionary of {column_name: pandas_dtype} that includes all columns
and corresponding dtypes that will be converted to a pandas
ExtensionBlock.
Returns
-------
pandas Block
"""
import pandas.core.internals as _int
block_arr = item.get('block', None)
placement = item['placement']
if 'dictionary' in item:
cat = _pandas_api.categorical_type.from_codes(
block_arr, categories=item['dictionary'],
ordered=item['ordered'])
block = _int.make_block(cat, placement=placement)
elif 'timezone' in item:
dtype = make_datetimetz(item['timezone'])
block = _int.make_block(block_arr, placement=placement,
klass=_int.DatetimeTZBlock,
dtype=dtype)
elif 'object' in item:
block = _int.make_block(builtin_pickle.loads(block_arr),
placement=placement)
elif 'py_array' in item:
# create ExtensionBlock
arr = item['py_array']
assert len(placement) == 1
name = columns[placement[0]]
pandas_dtype = extension_columns[name]
if not hasattr(pandas_dtype, '__from_arrow__'):
> raise ValueError("This column does not support to be converted "
"to a pandas ExtensionArray")
E ValueError: This column does not support to be converted to a pandas ExtensionArray
/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:747: ValueError
================== short test summary info ================
Context: https://github.com/googleapis/python-bigquery/pull/972/files#r741469631