From 0d62751d113d1f94220ff91234a65792ab6803dc Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 31 Jul 2023 13:48:54 +0100 Subject: [PATCH] respect allow_copy=False in interchange.from_dataframe --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/interchange/from_dataframe.py | 11 ++++++----- pandas/tests/interchange/test_impl.py | 13 +++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b0d2acd9d3490..4e5a53b439fe3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -684,6 +684,7 @@ Other - Bug in :class:`DataFrame` and :class:`Series` raising for data of complex dtype when ``NaN`` values are present (:issue:`53627`) - Bug in :class:`DatetimeIndex` where ``repr`` of index passed with time does not print time is midnight and non-day based freq(:issue:`53470`) - Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`) +- Bug in :func:`api.interchange.from_dataframe` was not respecting ``allow_copy`` argument (:issue:`54322`) - Bug in :func:`api.interchange.from_dataframe` when converting an empty DataFrame object (:issue:`53155`) - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index d3aece6e63798..ac48e5128fe86 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -67,7 +67,9 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame: if not hasattr(df, "__dataframe__"): raise ValueError("`df` does not support __dataframe__") - return _from_dataframe(df.__dataframe__(allow_copy=allow_copy)) + return _from_dataframe( + df.__dataframe__(allow_copy=allow_copy), allow_copy=allow_copy + ) def _from_dataframe(df: DataFrameXchg, allow_copy: bool = True): @@ -451,10 +453,9 @@ def buffer_to_ndarray( data_pointer = ctypes.cast( buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type) ) - return np.ctypeslib.as_array( - data_pointer, - shape=(length,), - ) + if length > 0: + return np.ctypeslib.as_array(data_pointer, shape=(length,)) + return np.array([], dtype=ctypes_type) def set_nulls( diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index bfb0eceaa0ca1..30018b06f296f 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -286,6 +286,19 @@ def test_empty_pyarrow(data): tm.assert_frame_equal(result, expected) +def test_multi_chunk_pyarrow() -> None: + pa = pytest.importorskip("pyarrow", "11.0.0") + n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]]) + names = ["n_legs"] + table = pa.table([n_legs], names=names) + with pytest.raises( + RuntimeError, + match="To join chunks a copy is required which is " + "forbidden by allow_copy=False", + ): + pd.api.interchange.from_dataframe(table, allow_copy=False) + + @pytest.mark.parametrize("tz", ["UTC", "US/Pacific"]) @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) def test_datetimetzdtype(tz, unit):