From 86f2334c73951ee9c0bb5028d9defc64b007d5be Mon Sep 17 00:00:00 2001 From: Ashar Date: Fri, 7 Mar 2025 15:00:38 -0500 Subject: [PATCH 1/4] WIP: Support PyArrow JSON type in ArrowDtype.type --- pandas/core/dtypes/dtypes.py | 2 ++ pandas/tests/extension/test_arrow.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index a02a8b8b110bf..623b0fe58e29d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2267,6 +2267,8 @@ def type(self): return type(pa_type) elif isinstance(pa_type, pa.ExtensionType): return type(self)(pa_type.storage_type).type + elif isinstance(pa_type, pa.JsonType): + return str raise NotImplementedError(pa_type) @property diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7b7c2a632aba2..61da81da73875 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3553,3 +3553,11 @@ def test_categorical_from_arrow_dictionary(): dtype="int64", ) tm.assert_series_equal(result, expected) + + +def test_arrow_json_type(): + # GH 60958 + + dtype = pd.ArrowDtype(pa.json_(pa.string())) + result = pd.api.types.pandas_dtype(dtype).type + assert result == str From 11bb9a3e045fbc76943bfdf26f4bc3c65eca9f2c Mon Sep 17 00:00:00 2001 From: Ashar Date: Mon, 10 Mar 2025 15:54:59 -0400 Subject: [PATCH 2/4] ENH: Added support for PyArrow JSON type in ArrowDtype.type --- pandas/tests/extension/test_arrow.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 61da81da73875..9a9038d98f4a7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3557,7 +3557,6 @@ def test_categorical_from_arrow_dictionary(): def test_arrow_json_type(): # GH 60958 - - dtype = pd.ArrowDtype(pa.json_(pa.string())) - result = pd.api.types.pandas_dtype(dtype).type + dtype = ArrowDtype(pa.json_(pa.string())) + result = dtype.type assert result == str From 5e0f68a5c4e66bb3a1b4c17024bf0e836960d9e9 Mon Sep 17 00:00:00 2001 From: Ashar Date: Tue, 11 Mar 2025 13:29:01 -0400 Subject: [PATCH 3/4] ENH: Support PyArrow JSON type in ArrowDtype.type Added support for PyArrow's JSON extension type in ArrowDtype.type by mapping JsonType to str. Fixes #60958. --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5581bac2b2d42..50d1969c331fd 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -65,6 +65,7 @@ Other enhancements - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) +- :meth: ``ArrowDtype.type`` now supports the pyarrow json data type (:issue:`60958`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) From a945971fbb31f746bd1dcea481895cfccf343174 Mon Sep 17 00:00:00 2001 From: Ashar Date: Tue, 11 Mar 2025 16:21:33 -0400 Subject: [PATCH 4/4] ENH: Support for PyArrow JSON type in ArrowDtype.type Improved extension type handling by using BaseExtensionType for consistent storage type resolution across all PyArrow extension types, including JSON. Fixes #60958 --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/dtypes/dtypes.py | 4 +--- pandas/tests/extension/test_arrow.py | 4 ++++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 50d1969c331fd..8232105851c52 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -61,11 +61,11 @@ Other enhancements - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`) +- :class:`ArrowDtype` now supports ``pyarrow.JsonType`` (:issue:`60958`) - :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`) - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) -- :meth: ``ArrowDtype.type`` now supports the pyarrow json data type (:issue:`60958`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 623b0fe58e29d..570074e047da6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2265,10 +2265,8 @@ def type(self): elif pa.types.is_null(pa_type): # TODO: None? pd.NA? pa.null? return type(pa_type) - elif isinstance(pa_type, pa.ExtensionType): + elif isinstance(pa_type, pa.BaseExtensionType): return type(self)(pa_type.storage_type).type - elif isinstance(pa_type, pa.JsonType): - return str raise NotImplementedError(pa_type) @property diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9a9038d98f4a7..fc5930ebcd8ac 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -42,6 +42,7 @@ pa_version_under11p0, pa_version_under13p0, pa_version_under14p0, + pa_version_under19p0, pa_version_under20p0, ) @@ -3555,6 +3556,9 @@ def test_categorical_from_arrow_dictionary(): tm.assert_series_equal(result, expected) +@pytest.mark.skipif( + pa_version_under19p0, reason="pa.json_ was introduced in pyarrow v19.0" +) def test_arrow_json_type(): # GH 60958 dtype = ArrowDtype(pa.json_(pa.string()))