From f8950346e884d6f0b9131ceaeb588e03c879306f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 5 Nov 2024 08:15:58 +0100 Subject: [PATCH 1/5] TST: add extra test case for np.array(obj, copy=False) read-only behaviour --- pandas/tests/copy_view/test_array.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index bb238d08bd9bd..3cff1abc44986 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -15,8 +15,12 @@ @pytest.mark.parametrize( "method", - [lambda ser: ser.values, lambda ser: np.asarray(ser)], - ids=["values", "asarray"], + [ + lambda ser: ser.values, + lambda ser: np.asarray(ser), + lambda ser: np.array(ser, copy=False), + ], + ids=["values", "asarray", "array"], ) def test_series_values(method): ser = Series([1, 2, 3], name="name") @@ -40,8 +44,12 @@ def test_series_values(method): @pytest.mark.parametrize( "method", - [lambda df: df.values, lambda df: np.asarray(df)], - ids=["values", "asarray"], + [ + lambda df: df.values, + lambda df: np.asarray(df), + lambda ser: np.array(ser, copy=False), + ], + ids=["values", "asarray", "array"], ) def test_dataframe_values(method): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) @@ -82,7 +90,7 @@ def test_series_to_numpy(): ser.iloc[0] = 0 assert ser.values[0] == 0 - # specify copy=False gives a writeable array + # specify copy=True gives a writeable array ser = Series([1, 2, 3], name="name") arr = ser.to_numpy(copy=True) assert not np.shares_memory(arr, get_array(ser, "name")) From f1e7115abb3bcbefe3f0ad60db31c592428123e3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 5 Nov 2024 08:29:52 +0100 Subject: [PATCH 2/5] fix remaining case for DataFrame raising an error if copy could not be avoided --- pandas/core/generic.py | 6 ++++++ pandas/tests/copy_view/test_array.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bbd627d4f0d73..8e77e4fecdd7b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2014,6 +2014,12 @@ def empty(self) -> bool: def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None ) -> np.ndarray: + if copy is False and not self._mgr.is_single_block and not self.empty: + # check this manually, otherwise ._values will already return a copy + # and np.array(values, copy=False) will not raise an error + raise ValueError( + "Unable to avoid copy while creating an array as requested." + ) values = self._values if copy is None: # Note: branch avoids `copy=None` for NumPy 1.x support diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 3cff1abc44986..69b03655bda4c 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -138,6 +138,9 @@ def test_dataframe_multiple_numpy_dtypes(): assert not np.shares_memory(arr, get_array(df, "a")) assert arr.flags.writeable is True + with pytest.raises(ValueError, match="Unable to avoid copy while creating"): + arr = np.array(df, copy=False) + def test_values_is_ea(): df = DataFrame({"a": date_range("2012-01-01", periods=3)}) From 230180ceed34d92d94500e7a0483d13d6014f957 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 5 Nov 2024 08:40:12 +0100 Subject: [PATCH 3/5] add test case for df copy=True --- pandas/tests/copy_view/test_array.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 69b03655bda4c..4e63cea936dff 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -141,6 +141,17 @@ def test_dataframe_multiple_numpy_dtypes(): with pytest.raises(ValueError, match="Unable to avoid copy while creating"): arr = np.array(df, copy=False) + arr = np.array(df, copy=True) + assert arr.flags.writeable is True + + +def test_dataframe_single_block_copy_true(): + # the copy=False/None cases are tested above in test_dataframe_values + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + arr = np.array(df, copy=True) + assert not np.shares_memory(arr, get_array(df, "a")) + assert arr.flags.writeable is True + def test_values_is_ea(): df = DataFrame({"a": date_range("2012-01-01", periods=3)}) From 31d7d721deb99dde9ab2483da37318d2a8c54517 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 5 Nov 2024 14:03:48 +0100 Subject: [PATCH 4/5] fixup test for older numpy --- pandas/tests/copy_view/test_array.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 4e63cea936dff..d93846e236127 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.compat.numpy import np_version_gt2 + from pandas import ( DataFrame, Series, @@ -138,8 +140,11 @@ def test_dataframe_multiple_numpy_dtypes(): assert not np.shares_memory(arr, get_array(df, "a")) assert arr.flags.writeable is True - with pytest.raises(ValueError, match="Unable to avoid copy while creating"): - arr = np.array(df, copy=False) + if not np_version_gt2: + # copy=False semantics are only supported in NumPy>=2. + + with pytest.raises(ValueError, match="Unable to avoid copy while creating"): + arr = np.array(df, copy=False) arr = np.array(df, copy=True) assert arr.flags.writeable is True From f563fd0ab6c5d050f6c53f3c551bfe19c98a9a48 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 7 Nov 2024 08:55:12 +0100 Subject: [PATCH 5/5] fix np2 check --- pandas/tests/copy_view/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index d93846e236127..2b3ef9201d918 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -140,7 +140,7 @@ def test_dataframe_multiple_numpy_dtypes(): assert not np.shares_memory(arr, get_array(df, "a")) assert arr.flags.writeable is True - if not np_version_gt2: + if np_version_gt2: # copy=False semantics are only supported in NumPy>=2. with pytest.raises(ValueError, match="Unable to avoid copy while creating"):