From 7dcbc6a524f3ddde932d272ac73c93630345e0bb Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 31 Jan 2024 12:35:14 +0000 Subject: [PATCH 1/4] support nullable integers in from_dataframe --- doc/source/whatsnew/v2.2.1.rst | 1 + pandas/core/interchange/column.py | 7 ++++++- pandas/tests/interchange/test_impl.py | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index 19b7e3493f964..90170196470dd 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -29,6 +29,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for Nullable integers (:issue:`55069`) - Fixed bug in :meth:`DataFrame.__getitem__` for empty :class:`DataFrame` with Copy-on-Write enabled (:issue:`57130`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 508cd74c57288..649b6f77892fe 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -15,7 +15,10 @@ ) import pandas as pd -from pandas.api.types import is_string_dtype +from pandas.api.types import ( + is_extension_array_dtype, + is_string_dtype, +) from pandas.core.interchange.buffer import PandasBuffer from pandas.core.interchange.dataframe_protocol import ( Column, @@ -143,6 +146,8 @@ def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]: byteorder = dtype.numpy_dtype.byteorder elif isinstance(dtype, DatetimeTZDtype): byteorder = dtype.base.byteorder # type: ignore[union-attr] + elif is_extension_array_dtype(dtype): + byteorder = dtype.numpy_dtype.byteorder else: byteorder = dtype.byteorder diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index aeee98f5a125e..068fe08eac2be 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -392,3 +392,12 @@ def test_large_string(): result = pd.api.interchange.from_dataframe(df.__dataframe__()) expected = pd.DataFrame({"a": ["x"]}, dtype="object") tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["Int8", "Int8[pyarrow]"]) +def test_nullable_integers(dtype: str) -> None: + pytest.importorskip("pyarrow") + df = pd.DataFrame({"a": [1]}, dtype=dtype) + expected = pd.DataFrame({"a": [1]}, dtype="int8") + result = pd.api.interchange.from_dataframe(df.__dataframe__()) + tm.assert_frame_equal(result, expected) From 2d4ec3a7bf4a2fa142268cedb4404d768f94e43e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 31 Jan 2024 12:52:41 +0000 Subject: [PATCH 2/4] gh issue number --- pandas/tests/interchange/test_impl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 068fe08eac2be..b5f013f12893b 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -396,6 +396,7 @@ def test_large_string(): @pytest.mark.parametrize("dtype", ["Int8", "Int8[pyarrow]"]) def test_nullable_integers(dtype: str) -> None: + # https://github.com/pandas-dev/pandas/issues/55069 pytest.importorskip("pyarrow") df = pd.DataFrame({"a": [1]}, dtype=dtype) expected = pd.DataFrame({"a": [1]}, dtype="int8") From 6210ba96f63c7047e10448885ab9b4204779e10b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 1 Feb 2024 16:49:36 +0000 Subject: [PATCH 3/4] use BaseMaskedDtype --- pandas/core/interchange/column.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 649b6f77892fe..350cab2c56013 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -11,14 +11,12 @@ from pandas.core.dtypes.dtypes import ( ArrowDtype, + BaseMaskedDtype, DatetimeTZDtype, ) import pandas as pd -from pandas.api.types import ( - is_extension_array_dtype, - is_string_dtype, -) +from pandas.api.types import is_string_dtype from pandas.core.interchange.buffer import PandasBuffer from pandas.core.interchange.dataframe_protocol import ( Column, @@ -146,7 +144,7 @@ def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]: byteorder = dtype.numpy_dtype.byteorder elif isinstance(dtype, DatetimeTZDtype): byteorder = dtype.base.byteorder # type: ignore[union-attr] - elif is_extension_array_dtype(dtype): + elif isinstance(dtype, BaseMaskedDtype): byteorder = dtype.numpy_dtype.byteorder else: byteorder = dtype.byteorder From 20463c9880c6e75846d6d9f11bd2e5b7643ff05d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 1 Feb 2024 16:51:15 +0000 Subject: [PATCH 4/4] only skip if int8[pyarrow] --- pandas/tests/interchange/test_impl.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index b5f013f12893b..0ec0d923018ae 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -8,6 +8,7 @@ is_ci_environment, is_platform_windows, ) +import pandas.util._test_decorators as td import pandas as pd import pandas._testing as tm @@ -394,10 +395,11 @@ def test_large_string(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("dtype", ["Int8", "Int8[pyarrow]"]) +@pytest.mark.parametrize( + "dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))] +) def test_nullable_integers(dtype: str) -> None: # https://github.com/pandas-dev/pandas/issues/55069 - pytest.importorskip("pyarrow") df = pd.DataFrame({"a": [1]}, dtype=dtype) expected = pd.DataFrame({"a": [1]}, dtype="int8") result = pd.api.interchange.from_dataframe(df.__dataframe__())