From 2ad6f8b887fdccb42b4919bdd44e2dbe950e24a5 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 9 Nov 2022 19:13:56 +0100 Subject: [PATCH 1/7] chanage copy default to None and test CoW in set_axis --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/tests/copy_view/test_methods.py | 34 +++++++++++++++++++++ pandas/tests/frame/methods/test_set_axis.py | 34 +++++++++++---------- 4 files changed, 54 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 21b3a0c033702..a3fa01184466b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4942,7 +4942,7 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool = True, + copy: bool = None, ) -> DataFrame: return super().set_axis(labels, axis=axis, copy=copy) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7a40e30c0ae7a..d01199bcc8a42 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -700,7 +700,7 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool_t = True, + copy: bool_t = None, ) -> NDFrameT: """ Assign desired index to given axis. diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index f5c7b31e59bc5..22d7c19dc68f0 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -356,3 +356,37 @@ def test_reorder_levels(using_copy_on_write): if using_copy_on_write: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) + + +def test_frame_set_axis(using_copy_on_write): + # GH 49473 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.set_axis(["a", "b", "c"], axis="index") + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + # mutating df2 triggers a copy-on-write for that column / block + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_series_set_axis(using_copy_on_write): + # GH 49473 + ser = Series([1, 2, 3]) + ser_orig = ser.copy() + ser2 = ser.set_axis(["a", "b", "c"], axis="index") + + if using_copy_on_write: + assert np.shares_memory(ser, ser2) + else: + assert not np.shares_memory(ser, ser2) + + # mutating ser triggers a copy-on-write for the column / block + ser2.iloc[0] = 0 + assert not np.shares_memory(ser2, ser) + tm.assert_series_equal(ser, ser_orig) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 7efd4434f8412..8a194d68fd3e7 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -21,7 +21,7 @@ def test_set_axis(self, obj): result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) - def test_set_axis_copy(self, obj): + def test_set_axis_copy(self, obj, using_copy_on_write): # Test copy keyword GH#47932 new_index = list("abcd")[: len(obj)] @@ -32,14 +32,15 @@ def test_set_axis_copy(self, obj): result = obj.set_axis(new_index, axis=0, copy=True) tm.assert_equal(expected, result) assert result is not obj - # check we DID make a copy - if obj.ndim == 1: - assert not tm.shares_memory(result, obj) - else: - assert not any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) + if not using_copy_on_write: + # check we DID make a copy + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) result = obj.set_axis(new_index, axis=0, copy=False) tm.assert_equal(expected, result) @@ -58,13 +59,14 @@ def test_set_axis_copy(self, obj): tm.assert_equal(expected, result) assert result is not obj # check we DID make a copy - if obj.ndim == 1: - assert not tm.shares_memory(result, obj) - else: - assert not any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) + if not using_copy_on_write: + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) res = obj.set_axis(new_index, copy=False) tm.assert_equal(expected, res) From 8a17c6730e08d0a20ad80d62313bf9954fbb94ec Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 9 Nov 2022 23:48:44 +0100 Subject: [PATCH 2/7] fix mypy/pylin errors --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a3fa01184466b..32443457f5c98 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4942,7 +4942,7 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool = None, + copy: bool | None = None, ) -> DataFrame: return super().set_axis(labels, axis=axis, copy=copy) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d01199bcc8a42..7389e629d60f9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -700,7 +700,7 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool_t = None, + copy: bool_t | None = None, ) -> NDFrameT: """ Assign desired index to given axis. @@ -734,7 +734,9 @@ def set_axis( return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy) @final - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): + def _set_axis_nocheck( + self, labels, axis: Axis, inplace: bool_t, copy: bool_t | None + ): if inplace: setattr(self, self._get_axis_name(axis), labels) else: From 656ddd854f0757b4d220effa3011b82fd11bae54 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Sat, 12 Nov 2022 14:26:40 +0100 Subject: [PATCH 3/7] change copy default to None in series.set_axis --- pandas/core/series.py | 2 +- pandas/tests/frame/methods/test_set_axis.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bdf92e1dcf02..087bc0693a14f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4809,7 +4809,7 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool = True, + copy: bool | None = None, ) -> Series: return super().set_axis(labels, axis=axis, copy=copy) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 8a194d68fd3e7..e88dbf09d5269 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -41,6 +41,14 @@ def test_set_axis_copy(self, obj, using_copy_on_write): tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) for i in range(obj.shape[1]) ) + else: + if obj.ndim == 1: + assert tm.shares_memory(result, obj) + else: + assert any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) result = obj.set_axis(new_index, axis=0, copy=False) tm.assert_equal(expected, result) @@ -67,6 +75,14 @@ def test_set_axis_copy(self, obj, using_copy_on_write): tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) for i in range(obj.shape[1]) ) + else: + if obj.ndim == 1: + assert tm.shares_memory(result, obj) + else: + assert any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) res = obj.set_axis(new_index, copy=False) tm.assert_equal(expected, res) From d6f98ff58a64f310da5b29051694b41400c3ffe4 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Sat, 12 Nov 2022 14:33:13 +0100 Subject: [PATCH 4/7] fix error in test for when copy is True --- pandas/tests/frame/methods/test_set_axis.py | 23 +++++++-------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index e88dbf09d5269..a487983d66cf4 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -32,23 +32,14 @@ def test_set_axis_copy(self, obj, using_copy_on_write): result = obj.set_axis(new_index, axis=0, copy=True) tm.assert_equal(expected, result) assert result is not obj - if not using_copy_on_write: - # check we DID make a copy - if obj.ndim == 1: - assert not tm.shares_memory(result, obj) - else: - assert not any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) + # check we DID make a copy + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) else: - if obj.ndim == 1: - assert tm.shares_memory(result, obj) - else: - assert any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) result = obj.set_axis(new_index, axis=0, copy=False) tm.assert_equal(expected, result) From 9f2e183d4798aac58a1f50a26501e373b127d6b9 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Sat, 12 Nov 2022 18:09:45 +0100 Subject: [PATCH 5/7] improve test --- pandas/tests/frame/methods/test_set_axis.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index a487983d66cf4..fd140e0098f2a 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -57,20 +57,21 @@ def test_set_axis_copy(self, obj, using_copy_on_write): result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) assert result is not obj - # check we DID make a copy - if not using_copy_on_write: + if using_copy_on_write: + # check we DID NOT make a copy if obj.ndim == 1: - assert not tm.shares_memory(result, obj) + assert tm.shares_memory(result, obj) else: - assert not any( + assert any( tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) for i in range(obj.shape[1]) ) else: + # check we DID make a copy if obj.ndim == 1: - assert tm.shares_memory(result, obj) + assert not tm.shares_memory(result, obj) else: - assert any( + assert not any( tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) for i in range(obj.shape[1]) ) From e17dffd5f9382ca8495d90d4caaf6bd3e8643270 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 30 Nov 2022 05:24:44 +0100 Subject: [PATCH 6/7] add tests for series axis --- pandas/tests/copy_view/test_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 22d7c19dc68f0..e610a3b466735 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -389,4 +389,4 @@ def test_series_set_axis(using_copy_on_write): # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 assert not np.shares_memory(ser2, ser) - tm.assert_series_equal(ser, ser_orig) + tm.assert_series_equal(ser, ser_orig) \ No newline at end of file From f5e1d5e408ecc026f700608efd33c6f920e10d46 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 28 Dec 2022 14:36:57 +0100 Subject: [PATCH 7/7] resolve merge conflicts --- pandas/tests/copy_view/test_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index e610a3b466735..22d7c19dc68f0 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -389,4 +389,4 @@ def test_series_set_axis(using_copy_on_write): # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 assert not np.shares_memory(ser2, ser) - tm.assert_series_equal(ser, ser_orig) \ No newline at end of file + tm.assert_series_equal(ser, ser_orig)