Skip to content

need __from_arrow__ to support types_mapper #38

Closed
@tswast

Description

@tswast

I attempting to convert from Arrow to Pandas using the types_mapper argument of to_pandas, I get the following error:

________________ test_list_rows_nullable_scalars_dtypes[None] ___________________

bigquery_client = <google.cloud.bigquery.client.Client object at 0x197b50a30>
scalars_table = 'swast-scratch.python_bigquery_tests_system_20211102220533_55dcac.scalars', max_results = None

    @pytest.mark.parametrize(
        ("max_results",), ((None,), (10,),)  # Use BQ Storage API.  # Use REST API.
    )
    def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results):
        # TODO(GH#836): Avoid INTERVAL columns until they are supported by the
        # BigQuery Storage API and pyarrow.
        schema = [
            bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN),
            bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC),
            bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES),
            bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE),
            bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME),
            bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64),
            bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY),
            bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64),
            bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC),
            bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING),
            bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME),
            bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP),
        ]
    
>       df = bigquery_client.list_rows(
            scalars_table, max_results=max_results, selected_fields=schema,
        ).to_dataframe()

tests/system/test_pandas.py:1030: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/table.py:1946: in to_dataframe
    df = record_batch.to_pandas(
pyarrow/array.pxi:757: in pyarrow.lib._PandasConvertible.to_pandas
    ???
pyarrow/table.pxi:1740: in pyarrow.lib.Table._to_pandas
    ???
/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:789: in table_to_blockmanager
    blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:1130: in _table_to_blocks
    return [_reconstruct_block(item, columns, extension_columns)
/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:1130: in <listcomp>
    return [_reconstruct_block(item, columns, extension_columns)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

item = {'placement': array([3]), 'py_array': <pyarrow.lib.ChunkedArray object at 0x197c53db0>
[
  [
    2021-07-21,
    null
  ]
]}
columns = ['bool_col', 'bignumeric_col', 'bytes_col', 'date_col', 'datetime_col', 'float64_col', ...]
extension_columns = {'date_col': <class 'db_dtypes.DateDtype'>}

    def _reconstruct_block(item, columns=None, extension_columns=None):
        """
        Construct a pandas Block from the `item` dictionary coming from pyarrow's
        serialization or returned by arrow::python::ConvertTableToPandas.
    
        This function takes care of converting dictionary types to pandas
        categorical, Timestamp-with-timezones to the proper pandas Block, and
        conversion to pandas ExtensionBlock
    
        Parameters
        ----------
        item : dict
            For basic types, this is a dictionary in the form of
            {'block': np.ndarray of values, 'placement': pandas block placement}.
            Additional keys are present for other types (dictionary, timezone,
            object).
        columns :
            Column names of the table being constructed, used for extension types
        extension_columns : dict
            Dictionary of {column_name: pandas_dtype} that includes all columns
            and corresponding dtypes that will be converted to a pandas
            ExtensionBlock.
    
        Returns
        -------
        pandas Block
    
        """
        import pandas.core.internals as _int
    
        block_arr = item.get('block', None)
        placement = item['placement']
        if 'dictionary' in item:
            cat = _pandas_api.categorical_type.from_codes(
                block_arr, categories=item['dictionary'],
                ordered=item['ordered'])
            block = _int.make_block(cat, placement=placement)
        elif 'timezone' in item:
            dtype = make_datetimetz(item['timezone'])
            block = _int.make_block(block_arr, placement=placement,
                                    klass=_int.DatetimeTZBlock,
                                    dtype=dtype)
        elif 'object' in item:
            block = _int.make_block(builtin_pickle.loads(block_arr),
                                    placement=placement)
        elif 'py_array' in item:
            # create ExtensionBlock
            arr = item['py_array']
            assert len(placement) == 1
            name = columns[placement[0]]
            pandas_dtype = extension_columns[name]
            if not hasattr(pandas_dtype, '__from_arrow__'):
>               raise ValueError("This column does not support to be converted "
                                 "to a pandas ExtensionArray")
E               ValueError: This column does not support to be converted to a pandas ExtensionArray

/usr/local/Caskroom/miniconda/base/envs/dev-3.9/lib/python3.9/site-packages/pyarrow/pandas_compat.py:747: ValueError
================== short test summary info ================

Context: https://github.com/googleapis/python-bigquery/pull/972/files#r741469631

Metadata

Metadata

Assignees

Labels

api: bigqueryIssues related to the googleapis/python-db-dtypes-pandas API.type: feature request‘Nice-to-have’ improvement, new feature or different behavior or design.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions