diff --git a/pandas/conftest.py b/pandas/conftest.py index 254d605e13460..47316a4ba3526 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1987,14 +1987,6 @@ def indexer_ial(request): return request.param -@pytest.fixture -def using_copy_on_write() -> bool: - """ - Fixture to check if Copy-on-Write is enabled. - """ - return True - - @pytest.fixture def using_infer_string() -> bool: """ diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index 56e4b186350f2..eeb19103f7bd5 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -12,189 +12,142 @@ from pandas.tests.copy_view.util import get_array -def test_concat_frames(using_copy_on_write): +def test_concat_frames(): df = DataFrame({"b": ["a"] * 3}) df2 = DataFrame({"a": ["a"] * 3}) df_orig = df.copy() result = concat([df, df2], axis=1) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) - else: - assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) result.iloc[0, 0] = "d" - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) + assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) result.iloc[0, 1] = "d" - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) tm.assert_frame_equal(df, df_orig) -def test_concat_frames_updating_input(using_copy_on_write): +def test_concat_frames_updating_input(): df = DataFrame({"b": ["a"] * 3}) df2 = DataFrame({"a": ["a"] * 3}) result = concat([df, df2], axis=1) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) - else: - assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) expected = result.copy() df.iloc[0, 0] = "d" - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) + assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) df2.iloc[0, 0] = "d" - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) tm.assert_frame_equal(result, expected) -def test_concat_series(using_copy_on_write): +def test_concat_series(): ser = Series([1, 2], name="a") ser2 = Series([3, 4], name="b") ser_orig = ser.copy() ser2_orig = ser2.copy() result = concat([ser, ser2], axis=1) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), ser.values) - assert np.shares_memory(get_array(result, "b"), ser2.values) - else: - assert not np.shares_memory(get_array(result, "a"), ser.values) - assert not np.shares_memory(get_array(result, "b"), ser2.values) + assert np.shares_memory(get_array(result, "a"), ser.values) + assert np.shares_memory(get_array(result, "b"), ser2.values) result.iloc[0, 0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), ser.values) - assert np.shares_memory(get_array(result, "b"), ser2.values) + assert not np.shares_memory(get_array(result, "a"), ser.values) + assert np.shares_memory(get_array(result, "b"), ser2.values) result.iloc[0, 1] = 1000 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), ser2.values) + assert not np.shares_memory(get_array(result, "b"), ser2.values) tm.assert_series_equal(ser, ser_orig) tm.assert_series_equal(ser2, ser2_orig) -def test_concat_frames_chained(using_copy_on_write): +def test_concat_frames_chained(): df1 = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) df2 = DataFrame({"c": [4, 5, 6]}) df3 = DataFrame({"d": [4, 5, 6]}) result = concat([concat([df1, df2], axis=1), df3], axis=1) expected = result.copy() - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "c"), get_array(df2, "c")) - assert np.shares_memory(get_array(result, "d"), get_array(df3, "d")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert not np.shares_memory(get_array(result, "c"), get_array(df2, "c")) - assert not np.shares_memory(get_array(result, "d"), get_array(df3, "d")) + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "c"), get_array(df2, "c")) + assert np.shares_memory(get_array(result, "d"), get_array(df3, "d")) df1.iloc[0, 0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) tm.assert_frame_equal(result, expected) -def test_concat_series_chained(using_copy_on_write): +def test_concat_series_chained(): ser1 = Series([1, 2, 3], name="a") ser2 = Series([4, 5, 6], name="c") ser3 = Series([4, 5, 6], name="d") result = concat([concat([ser1, ser2], axis=1), ser3], axis=1) expected = result.copy() - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) - assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c")) - assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) - assert not np.shares_memory(get_array(result, "c"), get_array(ser2, "c")) - assert not np.shares_memory(get_array(result, "d"), get_array(ser3, "d")) + assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) + assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c")) + assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d")) ser1.iloc[0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) tm.assert_frame_equal(result, expected) -def test_concat_series_updating_input(using_copy_on_write): +def test_concat_series_updating_input(): ser = Series([1, 2], name="a") ser2 = Series([3, 4], name="b") expected = DataFrame({"a": [1, 2], "b": [3, 4]}) result = concat([ser, ser2], axis=1) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(ser, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a")) - assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(ser, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) ser.iloc[0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) + assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) tm.assert_frame_equal(result, expected) ser2.iloc[0] = 1000 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) + assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) tm.assert_frame_equal(result, expected) -def test_concat_mixed_series_frame(using_copy_on_write): +def test_concat_mixed_series_frame(): df = DataFrame({"a": [1, 2, 3], "c": 1}) ser = Series([4, 5, 6], name="d") result = concat([df, ser], axis=1) expected = result.copy() - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - assert np.shares_memory(get_array(result, "c"), get_array(df, "c")) - assert np.shares_memory(get_array(result, "d"), get_array(ser, "d")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - assert not np.shares_memory(get_array(result, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d")) + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(result, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(result, "d"), get_array(ser, "d")) ser.iloc[0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d")) + assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d")) df.iloc[0, 0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("copy", [True, None, False]) -def test_concat_copy_keyword(using_copy_on_write, copy): +def test_concat_copy_keyword(copy): df = DataFrame({"a": [1, 2]}) df2 = DataFrame({"b": [1.5, 2.5]}) result = concat([df, df2], axis=1, copy=copy) - if using_copy_on_write or copy is False: - assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) - assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) - else: - assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) - assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) @pytest.mark.parametrize( @@ -204,7 +157,7 @@ def test_concat_copy_keyword(using_copy_on_write, copy): lambda df1, df2, **kwargs: merge(df1, df2, **kwargs), ], ) -def test_merge_on_key(using_copy_on_write, func): +def test_merge_on_key(func): df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]}) df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]}) df1_orig = df1.copy() @@ -212,28 +165,22 @@ def test_merge_on_key(using_copy_on_write, func): result = func(df1, df2, on="key") - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) - assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) - assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) + assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) result.iloc[0, 1] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) result.iloc[0, 2] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) tm.assert_frame_equal(df1, df1_orig) tm.assert_frame_equal(df2, df2_orig) -def test_merge_on_index(using_copy_on_write): +def test_merge_on_index(): df1 = DataFrame({"a": [1, 2, 3]}) df2 = DataFrame({"b": [4, 5, 6]}) df1_orig = df1.copy() @@ -241,21 +188,15 @@ def test_merge_on_index(using_copy_on_write): result = merge(df1, df2, left_index=True, right_index=True) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) result.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) result.iloc[0, 1] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) tm.assert_frame_equal(df1, df1_orig) tm.assert_frame_equal(df2, df2_orig) @@ -267,7 +208,7 @@ def test_merge_on_index(using_copy_on_write): (lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"), ], ) -def test_merge_on_key_enlarging_one(using_copy_on_write, func, how): +def test_merge_on_key_enlarging_one(func, how): df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]}) df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]}) df1_orig = df1.copy() @@ -275,45 +216,36 @@ def test_merge_on_key_enlarging_one(using_copy_on_write, func, how): result = func(df1, df2, how=how) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) - assert df2._mgr._has_no_reference(1) - assert df2._mgr._has_no_reference(0) - assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is ( - how == "left" - ) - assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert df2._mgr._has_no_reference(1) + assert df2._mgr._has_no_reference(0) + assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is ( + how == "left" + ) + assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) if how == "left": result.iloc[0, 1] = 0 else: result.iloc[0, 2] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) tm.assert_frame_equal(df1, df1_orig) tm.assert_frame_equal(df2, df2_orig) @pytest.mark.parametrize("copy", [True, None, False]) -def test_merge_copy_keyword(using_copy_on_write, copy): +def test_merge_copy_keyword(copy): df = DataFrame({"a": [1, 2]}) df2 = DataFrame({"b": [3, 4.5]}) result = df.merge(df2, copy=copy, left_index=True, right_index=True) - if using_copy_on_write or copy is False: - assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) - assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) - else: - assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) - assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) -def test_join_on_key(using_copy_on_write): +def test_join_on_key(): df_index = Index(["a", "b", "c"], name="key") df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True)) @@ -324,29 +256,23 @@ def test_join_on_key(using_copy_on_write): result = df1.join(df2, on="key") - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) - assert np.shares_memory(get_array(result.index), get_array(df1.index)) - assert not np.shares_memory(get_array(result.index), get_array(df2.index)) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(result.index), get_array(df1.index)) + assert not np.shares_memory(get_array(result.index), get_array(df2.index)) result.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) result.iloc[0, 1] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) tm.assert_frame_equal(df1, df1_orig) tm.assert_frame_equal(df2, df2_orig) -def test_join_multiple_dataframes_on_key(using_copy_on_write): +def test_join_multiple_dataframes_on_key(): df_index = Index(["a", "b", "c"], name="key") df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True)) @@ -360,36 +286,24 @@ def test_join_multiple_dataframes_on_key(using_copy_on_write): result = df1.join(dfs_list) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) - assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) - assert np.shares_memory(get_array(result.index), get_array(df1.index)) - assert not np.shares_memory( - get_array(result.index), get_array(dfs_list[0].index) - ) - assert not np.shares_memory( - get_array(result.index), get_array(dfs_list[1].index) - ) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) - assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) + assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) + assert np.shares_memory(get_array(result.index), get_array(df1.index)) + assert not np.shares_memory(get_array(result.index), get_array(dfs_list[0].index)) + assert not np.shares_memory(get_array(result.index), get_array(dfs_list[1].index)) result.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) - assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) - assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) + assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) result.iloc[0, 1] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) - assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) + assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) + assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) result.iloc[0, 2] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) + assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) tm.assert_frame_equal(df1, df1_orig) for df, df_orig in zip(dfs_list, dfs_list_orig): diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 73919997fa7fd..e96899d155c54 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -16,7 +16,7 @@ from pandas.tests.copy_view.util import get_array -def test_copy(using_copy_on_write): +def test_copy(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_copy = df.copy() @@ -28,49 +28,36 @@ def test_copy(using_copy_on_write): # the deep copy doesn't share memory assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) - if using_copy_on_write: - assert not df_copy._mgr.blocks[0].refs.has_reference() - assert not df_copy._mgr.blocks[1].refs.has_reference() + assert not df_copy._mgr.blocks[0].refs.has_reference() + assert not df_copy._mgr.blocks[1].refs.has_reference() # mutating copy doesn't mutate original df_copy.iloc[0, 0] = 0 assert df.iloc[0, 0] == 1 -def test_copy_shallow(using_copy_on_write): +def test_copy_shallow(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_copy = df.copy(deep=False) # the shallow copy also makes a shallow copy of the index - if using_copy_on_write: - assert df_copy.index is not df.index - assert df_copy.columns is not df.columns - assert df_copy.index.is_(df.index) - assert df_copy.columns.is_(df.columns) - else: - assert df_copy.index is df.index - assert df_copy.columns is df.columns + assert df_copy.index is not df.index + assert df_copy.columns is not df.columns + assert df_copy.index.is_(df.index) + assert df_copy.columns.is_(df.columns) # the shallow copy still shares memory assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) - if using_copy_on_write: - assert df_copy._mgr.blocks[0].refs.has_reference() - assert df_copy._mgr.blocks[1].refs.has_reference() - - if using_copy_on_write: - # mutating shallow copy doesn't mutate original - df_copy.iloc[0, 0] = 0 - assert df.iloc[0, 0] == 1 - # mutating triggered a copy-on-write -> no longer shares memory - assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) - # but still shares memory for the other columns/blocks - assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c")) - else: - # mutating shallow copy does mutate original - df_copy.iloc[0, 0] = 0 - assert df.iloc[0, 0] == 0 - # and still shares memory - assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) + assert df_copy._mgr.blocks[0].refs.has_reference() + assert df_copy._mgr.blocks[1].refs.has_reference() + + # mutating shallow copy doesn't mutate original + df_copy.iloc[0, 0] = 0 + assert df.iloc[0, 0] == 1 + # mutating triggered a copy-on-write -> no longer shares memory + assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) + # but still shares memory for the other columns/blocks + assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c")) @pytest.mark.parametrize("copy", [True, None, False]) @@ -113,7 +100,7 @@ def test_copy_shallow(using_copy_on_write): "set_flags", ], ) -def test_methods_copy_keyword(request, method, copy, using_copy_on_write): +def test_methods_copy_keyword(request, method, copy): index = None if "to_timestamp" in request.node.callspec.id: index = period_range("2012-01-01", freq="D", periods=3) @@ -126,18 +113,7 @@ def test_methods_copy_keyword(request, method, copy, using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index) df2 = method(df, copy=copy) - - share_memory = using_copy_on_write or copy is False - - if request.node.callspec.id.startswith("reindex-"): - # TODO copy=False without CoW still returns a copy in this case - if not using_copy_on_write and copy is False: - share_memory = False - - if share_memory: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) @pytest.mark.parametrize("copy", [True, None, False]) @@ -180,7 +156,7 @@ def test_methods_copy_keyword(request, method, copy, using_copy_on_write): "set_flags", ], ) -def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write): +def test_methods_series_copy_keyword(request, method, copy): index = None if "to_timestamp" in request.node.callspec.id: index = period_range("2012-01-01", freq="D", periods=3) @@ -195,32 +171,21 @@ def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write) ser = Series([1, 2, 3], index=index) ser2 = method(ser, copy=copy) - - share_memory = using_copy_on_write or copy is False - - if share_memory: - assert np.shares_memory(get_array(ser2), get_array(ser)) - else: - assert not np.shares_memory(get_array(ser2), get_array(ser)) + assert np.shares_memory(get_array(ser2), get_array(ser)) @pytest.mark.parametrize("copy", [True, None, False]) -def test_transpose_copy_keyword(using_copy_on_write, copy): +def test_transpose_copy_keyword(copy): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) result = df.transpose(copy=copy) - share_memory = using_copy_on_write or copy is False or copy is None - - if share_memory: - assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) - else: - assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) + assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) # ----------------------------------------------------------------------------- # DataFrame methods returning new DataFrame using shallow copy -def test_reset_index(using_copy_on_write): +def test_reset_index(): # Case: resetting the index (i.e. adding a new column) + mutating the # resulting dataframe df = DataFrame( @@ -230,28 +195,23 @@ def test_reset_index(using_copy_on_write): df2 = df.reset_index() df2._mgr._verify_integrity() - if using_copy_on_write: - # still shares memory (df2 is a shallow copy) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + # still shares memory (df2 is a shallow copy) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 2] = 0 assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("index", [pd.RangeIndex(0, 2), Index([1, 2])]) -def test_reset_index_series_drop(using_copy_on_write, index): +def test_reset_index_series_drop(index): ser = Series([1, 2], index=index) ser_orig = ser.copy() ser2 = ser.reset_index(drop=True) - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(ser2)) - assert not ser._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(ser), get_array(ser2)) + assert np.shares_memory(get_array(ser), get_array(ser2)) + assert not ser._mgr._has_no_reference(0) ser2.iloc[0] = 100 tm.assert_series_equal(ser, ser_orig) @@ -268,45 +228,39 @@ def test_groupby_column_index_in_references(): tm.assert_frame_equal(result, expected) -def test_rename_columns(using_copy_on_write): +def test_rename_columns(): # Case: renaming columns returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.rename(columns=str.upper) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]}) tm.assert_frame_equal(df2, expected) tm.assert_frame_equal(df, df_orig) -def test_rename_columns_modify_parent(using_copy_on_write): +def test_rename_columns_modify_parent(): # Case: renaming columns returns a new dataframe # + afterwards modifying the original (parent) dataframe df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df2 = df.rename(columns=str.upper) df2_orig = df2.copy() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) df.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) expected = DataFrame({"a": [0, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) tm.assert_frame_equal(df, expected) tm.assert_frame_equal(df2, df2_orig) -def test_pipe(using_copy_on_write): +def test_pipe(): df = DataFrame({"a": [1, 2, 3], "b": 1.5}) df_orig = df.copy() @@ -319,18 +273,12 @@ def testfunc(df): # mutating df2 triggers a copy-on-write for that column df2.iloc[0, 0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - expected = DataFrame({"a": [0, 2, 3], "b": 1.5}) - tm.assert_frame_equal(df, expected) - - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) -def test_pipe_modify_df(using_copy_on_write): +def test_pipe_modify_df(): df = DataFrame({"a": [1, 2, 3], "b": 1.5}) df_orig = df.copy() @@ -342,34 +290,24 @@ def testfunc(df): assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - expected = DataFrame({"a": [100, 2, 3], "b": 1.5}) - tm.assert_frame_equal(df, expected) - - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) -def test_reindex_columns(using_copy_on_write): +def test_reindex_columns(): # Case: reindexing the column returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.reindex(columns=["a", "c"]) - if using_copy_on_write: - # still shares memory (df2 is a shallow copy) - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + # still shares memory (df2 is a shallow copy) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) @@ -383,46 +321,37 @@ def test_reindex_columns(using_copy_on_write): ], ids=["identical", "view", "copy", "values"], ) -def test_reindex_rows(index, using_copy_on_write): +def test_reindex_rows(index): # Case: reindexing the rows with an index that matches the current index # can use a shallow copy df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.reindex(index=index(df.index)) - if using_copy_on_write: - # still shares memory (df2 is a shallow copy) - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + # still shares memory (df2 is a shallow copy) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) -def test_drop_on_column(using_copy_on_write): +def test_drop_on_column(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.drop(columns="a") df2._mgr._verify_integrity() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - else: - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) -def test_select_dtypes(using_copy_on_write): +def test_select_dtypes(): # Case: selecting columns using `select_dtypes()` returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -430,40 +359,31 @@ def test_select_dtypes(using_copy_on_write): df2 = df.select_dtypes("int64") df2._mgr._verify_integrity() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( "filter_kwargs", [{"items": ["a"]}, {"like": "a"}, {"regex": "a"}] ) -def test_filter(using_copy_on_write, filter_kwargs): +def test_filter(filter_kwargs): # Case: selecting columns using `filter()` returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.filter(**filter_kwargs) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block - if using_copy_on_write: - df2.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_shift_no_op(using_copy_on_write): +def test_shift_no_op(): df = DataFrame( [[1, 2], [3, 4], [5, 6]], index=date_range("2020-01-01", "2020-01-03"), @@ -471,20 +391,15 @@ def test_shift_no_op(using_copy_on_write): ) df_orig = df.copy() df2 = df.shift(periods=0) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) tm.assert_frame_equal(df2, df_orig) -def test_shift_index(using_copy_on_write): +def test_shift_index(): df = DataFrame( [[1, 2], [3, 4], [5, 6]], index=date_range("2020-01-01", "2020-01-03"), @@ -495,7 +410,7 @@ def test_shift_index(using_copy_on_write): assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) -def test_shift_rows_freq(using_copy_on_write): +def test_shift_rows_freq(): df = DataFrame( [[1, 2], [3, 4], [5, 6]], index=date_range("2020-01-01", "2020-01-03"), @@ -505,18 +420,13 @@ def test_shift_rows_freq(using_copy_on_write): df_orig.index = date_range("2020-01-02", "2020-01-04") df2 = df.shift(periods=1, freq="1D") - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) tm.assert_frame_equal(df2, df_orig) -def test_shift_columns(using_copy_on_write): +def test_shift_columns(): df = DataFrame( [[1, 2], [3, 4], [5, 6]], columns=date_range("2020-01-01", "2020-01-02") ) @@ -524,18 +434,17 @@ def test_shift_columns(using_copy_on_write): assert np.shares_memory(get_array(df2, "2020-01-02"), get_array(df, "2020-01-01")) df.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory( - get_array(df2, "2020-01-02"), get_array(df, "2020-01-01") - ) - expected = DataFrame( - [[np.nan, 1], [np.nan, 3], [np.nan, 5]], - columns=date_range("2020-01-01", "2020-01-02"), - ) - tm.assert_frame_equal(df2, expected) + assert not np.shares_memory( + get_array(df2, "2020-01-02"), get_array(df, "2020-01-01") + ) + expected = DataFrame( + [[np.nan, 1], [np.nan, 3], [np.nan, 5]], + columns=date_range("2020-01-01", "2020-01-02"), + ) + tm.assert_frame_equal(df2, expected) -def test_pop(using_copy_on_write): +def test_pop(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() view_original = df[:] @@ -544,16 +453,11 @@ def test_pop(using_copy_on_write): assert np.shares_memory(result.values, get_array(view_original, "a")) assert np.shares_memory(get_array(df, "b"), get_array(view_original, "b")) - if using_copy_on_write: - result.iloc[0] = 0 - assert not np.shares_memory(result.values, get_array(view_original, "a")) + result.iloc[0] = 0 + assert not np.shares_memory(result.values, get_array(view_original, "a")) df.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "b"), get_array(view_original, "b")) - tm.assert_frame_equal(view_original, df_orig) - else: - expected = DataFrame({"a": [1, 2, 3], "b": [0, 5, 6], "c": [0.1, 0.2, 0.3]}) - tm.assert_frame_equal(view_original, expected) + assert not np.shares_memory(get_array(df, "b"), get_array(view_original, "b")) + tm.assert_frame_equal(view_original, df_orig) @pytest.mark.parametrize( @@ -564,46 +468,35 @@ def test_pop(using_copy_on_write): lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1), ], ) -def test_align_frame(using_copy_on_write, func): +def test_align_frame(func): df = DataFrame({"a": [1, 2, 3], "b": "a"}) df_orig = df.copy() df_changed = df[["b", "a"]].copy() df2, _ = func(df, df_changed) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_align_series(using_copy_on_write): +def test_align_series(): ser = Series([1, 2]) ser_orig = ser.copy() ser_other = ser.copy() ser2, ser_other_result = ser.align(ser_other) - if using_copy_on_write: - assert np.shares_memory(ser2.values, ser.values) - assert np.shares_memory(ser_other_result.values, ser_other.values) - else: - assert not np.shares_memory(ser2.values, ser.values) - assert not np.shares_memory(ser_other_result.values, ser_other.values) - + assert np.shares_memory(ser2.values, ser.values) + assert np.shares_memory(ser_other_result.values, ser_other.values) ser2.iloc[0] = 0 ser_other_result.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(ser2.values, ser.values) - assert not np.shares_memory(ser_other_result.values, ser_other.values) + assert not np.shares_memory(ser2.values, ser.values) + assert not np.shares_memory(ser_other_result.values, ser_other.values) tm.assert_series_equal(ser, ser_orig) tm.assert_series_equal(ser_other, ser_orig) -def test_align_copy_false(using_copy_on_write): +def test_align_copy_false(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() df2, df3 = df.align(df, copy=False) @@ -611,15 +504,14 @@ def test_align_copy_false(using_copy_on_write): assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - if using_copy_on_write: - df2.loc[0, "a"] = 0 - tm.assert_frame_equal(df, df_orig) # Original is unchanged + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged - df3.loc[0, "a"] = 0 - tm.assert_frame_equal(df, df_orig) # Original is unchanged + df3.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged -def test_align_with_series_copy_false(using_copy_on_write): +def test_align_with_series_copy_false(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) ser = Series([1, 2, 3], name="x") ser_orig = ser.copy() @@ -630,15 +522,14 @@ def test_align_with_series_copy_false(using_copy_on_write): assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x")) - if using_copy_on_write: - df2.loc[0, "a"] = 0 - tm.assert_frame_equal(df, df_orig) # Original is unchanged + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged - ser2.loc[0] = 0 - tm.assert_series_equal(ser, ser_orig) # Original is unchanged + ser2.loc[0] = 0 + tm.assert_series_equal(ser, ser_orig) # Original is unchanged -def test_to_frame(using_copy_on_write): +def test_to_frame(): # Case: converting a Series to a DataFrame with to_frame ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -650,26 +541,15 @@ def test_to_frame(using_copy_on_write): df.iloc[0, 0] = 0 - if using_copy_on_write: - # mutating df triggers a copy-on-write for that column - assert not np.shares_memory(ser.values, get_array(df, 0)) - tm.assert_series_equal(ser, ser_orig) - else: - # but currently select_dtypes() actually returns a view -> mutates parent - expected = ser_orig.copy() - expected.iloc[0] = 0 - tm.assert_series_equal(ser, expected) + # mutating df triggers a copy-on-write for that column + assert not np.shares_memory(ser.values, get_array(df, 0)) + tm.assert_series_equal(ser, ser_orig) # modify original series -> don't modify dataframe df = ser[:].to_frame() ser.iloc[0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, ser_orig.to_frame()) - else: - expected = ser_orig.copy().to_frame() - expected.iloc[0, 0] = 0 - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, ser_orig.to_frame()) @pytest.mark.parametrize( @@ -682,37 +562,29 @@ def test_to_frame(using_copy_on_write): ], ids=["shallow-copy", "reset_index", "rename", "select_dtypes"], ) -def test_chained_methods(request, method, idx, using_copy_on_write): +def test_chained_methods(request, method, idx): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() - # when not using CoW, only the copy() variant actually gives a view - df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy" - # modify df2 -> don't modify df df2 = method(df) df2.iloc[0, idx] = 0 - if not df2_is_view: - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) # modify df -> don't modify df2 df2 = method(df) df.iloc[0, 0] = 0 - if not df2_is_view: - tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) + tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) @pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})]) -def test_to_timestamp(using_copy_on_write, obj): +def test_to_timestamp(obj): obj.index = Index([Period("2012-1-1", freq="D"), Period("2012-1-2", freq="D")]) obj_orig = obj.copy() obj2 = obj.to_timestamp() - if using_copy_on_write: - assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) - else: - assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) + assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) # mutating obj2 triggers a copy-on-write for that column / block obj2.iloc[0] = 0 @@ -721,16 +593,13 @@ def test_to_timestamp(using_copy_on_write, obj): @pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})]) -def test_to_period(using_copy_on_write, obj): +def test_to_period(obj): obj.index = Index([Timestamp("2019-12-31"), Timestamp("2020-12-31")]) obj_orig = obj.copy() obj2 = obj.to_period(freq="Y") - if using_copy_on_write: - assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) - else: - assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) + assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) # mutating obj2 triggers a copy-on-write for that column / block obj2.iloc[0] = 0 @@ -738,16 +607,13 @@ def test_to_period(using_copy_on_write, obj): tm.assert_equal(obj, obj_orig) -def test_set_index(using_copy_on_write): +def test_set_index(): # GH 49473 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.set_index("a") - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - else: - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 1] = 0 @@ -764,20 +630,18 @@ def test_set_index_mutating_parent_does_not_mutate_index(): tm.assert_frame_equal(result, expected) -def test_add_prefix(using_copy_on_write): +def test_add_prefix(): # GH 49473 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.add_prefix("CoW_") - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a")) df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "CoW_c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "CoW_c"), get_array(df, "c")) expected = DataFrame( {"CoW_a": [0, 2, 3], "CoW_b": [4, 5, 6], "CoW_c": [0.1, 0.2, 0.3]} ) @@ -785,17 +649,15 @@ def test_add_prefix(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_add_suffix(using_copy_on_write): +def test_add_suffix(): # GH 49473 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.add_suffix("_CoW") - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a")) df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c_CoW"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "c_CoW"), get_array(df, "c")) expected = DataFrame( {"a_CoW": [0, 2, 3], "b_CoW": [4, 5, 6], "c_CoW": [0.1, 0.2, 0.3]} ) @@ -804,36 +666,27 @@ def test_add_suffix(using_copy_on_write): @pytest.mark.parametrize("axis, val", [(0, 5.5), (1, np.nan)]) -def test_dropna(using_copy_on_write, axis, val): +def test_dropna(axis, val): df = DataFrame({"a": [1, 2, 3], "b": [4, val, 6], "c": "d"}) df_orig = df.copy() df2 = df.dropna(axis=axis) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("val", [5, 5.5]) -def test_dropna_series(using_copy_on_write, val): +def test_dropna_series(val): ser = Series([1, val, 4]) ser_orig = ser.copy() ser2 = ser.dropna() - - if using_copy_on_write: - assert np.shares_memory(ser2.values, ser.values) - else: - assert not np.shares_memory(ser2.values, ser.values) + assert np.shares_memory(ser2.values, ser.values) ser2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(ser2.values, ser.values) + assert not np.shares_memory(ser2.values, ser.values) tm.assert_series_equal(ser, ser_orig) @@ -846,52 +699,40 @@ def test_dropna_series(using_copy_on_write, val): lambda df: df.tail(3), ], ) -def test_head_tail(method, using_copy_on_write): +def test_head_tail(method): df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = method(df) df2._mgr._verify_integrity() - if using_copy_on_write: - # We are explicitly deviating for CoW here to make an eager copy (avoids - # tracking references for very cheap ops) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + # We are explicitly deviating for CoW here to make an eager copy (avoids + # tracking references for very cheap ops) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) # modify df2 to trigger CoW for that block df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - # without CoW enabled, head and tail return views. Mutating df2 also mutates df. - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - df2.iloc[0, 0] = 1 + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_infer_objects(using_copy_on_write): +def test_infer_objects(): df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"}) df_orig = df.copy() df2 = df.infer_objects() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) df2.iloc[0, 0] = 0 df2.iloc[0, 1] = "d" - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) tm.assert_frame_equal(df, df_orig) -def test_infer_objects_no_reference(using_copy_on_write): +def test_infer_objects_no_reference(): df = DataFrame( { "a": [1, 2], @@ -912,14 +753,13 @@ def test_infer_objects_no_reference(using_copy_on_write): df.iloc[0, 0] = 0 df.iloc[0, 1] = "d" df.iloc[0, 3] = Timestamp("2018-12-31") - if using_copy_on_write: - assert np.shares_memory(arr_a, get_array(df, "a")) - # TODO(CoW): Block splitting causes references here - assert not np.shares_memory(arr_b, get_array(df, "b")) - assert np.shares_memory(arr_d, get_array(df, "d")) + assert np.shares_memory(arr_a, get_array(df, "a")) + # TODO(CoW): Block splitting causes references here + assert not np.shares_memory(arr_b, get_array(df, "b")) + assert np.shares_memory(arr_d, get_array(df, "d")) -def test_infer_objects_reference(using_copy_on_write): +def test_infer_objects_reference(): df = DataFrame( { "a": [1, 2], @@ -940,10 +780,9 @@ def test_infer_objects_reference(using_copy_on_write): df.iloc[0, 0] = 0 df.iloc[0, 1] = "d" df.iloc[0, 3] = Timestamp("2018-12-31") - if using_copy_on_write: - assert not np.shares_memory(arr_a, get_array(df, "a")) - assert not np.shares_memory(arr_b, get_array(df, "b")) - assert np.shares_memory(arr_d, get_array(df, "d")) + assert not np.shares_memory(arr_a, get_array(df, "a")) + assert not np.shares_memory(arr_b, get_array(df, "b")) + assert np.shares_memory(arr_d, get_array(df, "d")) @pytest.mark.parametrize( @@ -953,103 +792,76 @@ def test_infer_objects_reference(using_copy_on_write): {"before": 0, "after": 1, "axis": 0}, ], ) -def test_truncate(using_copy_on_write, kwargs): +def test_truncate(kwargs): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2}) df_orig = df.copy() df2 = df.truncate(**kwargs) df2._mgr._verify_integrity() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("method", ["assign", "drop_duplicates"]) -def test_assign_drop_duplicates(using_copy_on_write, method): +def test_assign_drop_duplicates(method): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() df2 = getattr(df, method)() df2._mgr._verify_integrity() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})]) -def test_take(using_copy_on_write, obj): +def test_take(obj): # Check that no copy is made when we take all rows in original order obj_orig = obj.copy() obj2 = obj.take([0, 1]) - - if using_copy_on_write: - assert np.shares_memory(obj2.values, obj.values) - else: - assert not np.shares_memory(obj2.values, obj.values) + assert np.shares_memory(obj2.values, obj.values) obj2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(obj2.values, obj.values) + assert not np.shares_memory(obj2.values, obj.values) tm.assert_equal(obj, obj_orig) @pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})]) -def test_between_time(using_copy_on_write, obj): +def test_between_time(obj): obj.index = date_range("2018-04-09", periods=2, freq="1D20min") obj_orig = obj.copy() obj2 = obj.between_time("0:00", "1:00") - - if using_copy_on_write: - assert np.shares_memory(obj2.values, obj.values) - else: - assert not np.shares_memory(obj2.values, obj.values) + assert np.shares_memory(obj2.values, obj.values) obj2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(obj2.values, obj.values) + assert not np.shares_memory(obj2.values, obj.values) tm.assert_equal(obj, obj_orig) -def test_reindex_like(using_copy_on_write): +def test_reindex_like(): df = DataFrame({"a": [1, 2], "b": "a"}) other = DataFrame({"b": "a", "a": [1, 2]}) df_orig = df.copy() df2 = df.reindex_like(other) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 1] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_sort_index(using_copy_on_write): +def test_sort_index(): # GH 49473 ser = Series([1, 2, 3]) ser_orig = ser.copy() ser2 = ser.sort_index() - - if using_copy_on_write: - assert np.shares_memory(ser.values, ser2.values) - else: - assert not np.shares_memory(ser.values, ser2.values) + assert np.shares_memory(ser.values, ser2.values) # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 @@ -1061,14 +873,10 @@ def test_sort_index(using_copy_on_write): "obj, kwargs", [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})], ) -def test_sort_values(using_copy_on_write, obj, kwargs): +def test_sort_values(obj, kwargs): obj_orig = obj.copy() obj2 = obj.sort_values(**kwargs) - - if using_copy_on_write: - assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) - else: - assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) + assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a")) # mutating df triggers a copy-on-write for the column / block obj2.iloc[0] = 0 @@ -1080,7 +888,7 @@ def test_sort_values(using_copy_on_write, obj, kwargs): "obj, kwargs", [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})], ) -def test_sort_values_inplace(using_copy_on_write, obj, kwargs): +def test_sort_values_inplace(obj, kwargs): obj_orig = obj.copy() view = obj[:] obj.sort_values(inplace=True, **kwargs) @@ -1089,105 +897,79 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs): # mutating obj triggers a copy-on-write for the column / block obj.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a")) - tm.assert_equal(view, obj_orig) - else: - assert np.shares_memory(get_array(obj, "a"), get_array(view, "a")) + assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a")) + tm.assert_equal(view, obj_orig) @pytest.mark.parametrize("decimals", [-1, 0, 1]) -def test_round(using_copy_on_write, decimals): +def test_round(decimals): df = DataFrame({"a": [1, 2], "b": "c"}) df_orig = df.copy() df2 = df.round(decimals=decimals) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - # TODO: Make inplace by using out parameter of ndarray.round? - if decimals >= 0: - # Ensure lazy copy if no-op - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + # TODO: Make inplace by using out parameter of ndarray.round? + if decimals >= 0: + # Ensure lazy copy if no-op + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 1] = "d" df2.iloc[0, 0] = 4 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_reorder_levels(using_copy_on_write): +def test_reorder_levels(): index = MultiIndex.from_tuples( [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"] ) df = DataFrame({"a": [1, 2, 3, 4]}, index=index) df_orig = df.copy() df2 = df.reorder_levels(order=["two", "one"]) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_series_reorder_levels(using_copy_on_write): +def test_series_reorder_levels(): index = MultiIndex.from_tuples( [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"] ) ser = Series([1, 2, 3, 4], index=index) ser_orig = ser.copy() ser2 = ser.reorder_levels(order=["two", "one"]) - - if using_copy_on_write: - assert np.shares_memory(ser2.values, ser.values) - else: - assert not np.shares_memory(ser2.values, ser.values) + assert np.shares_memory(ser2.values, ser.values) ser2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(ser2.values, ser.values) + assert not np.shares_memory(ser2.values, ser.values) tm.assert_series_equal(ser, ser_orig) @pytest.mark.parametrize("obj", [Series([1, 2, 3]), DataFrame({"a": [1, 2, 3]})]) -def test_swaplevel(using_copy_on_write, obj): +def test_swaplevel(obj): index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"]) obj.index = index obj_orig = obj.copy() obj2 = obj.swaplevel() - - if using_copy_on_write: - assert np.shares_memory(obj2.values, obj.values) - else: - assert not np.shares_memory(obj2.values, obj.values) + assert np.shares_memory(obj2.values, obj.values) obj2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(obj2.values, obj.values) + assert not np.shares_memory(obj2.values, obj.values) tm.assert_equal(obj, obj_orig) -def test_frame_set_axis(using_copy_on_write): +def test_frame_set_axis(): # GH 49473 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df.set_axis(["a", "b", "c"], axis="index") - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 0] = 0 @@ -1195,16 +977,12 @@ def test_frame_set_axis(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_series_set_axis(using_copy_on_write): +def test_series_set_axis(): # GH 49473 ser = Series([1, 2, 3]) ser_orig = ser.copy() ser2 = ser.set_axis(["a", "b", "c"], axis="index") - - if using_copy_on_write: - assert np.shares_memory(ser, ser2) - else: - assert not np.shares_memory(ser, ser2) + assert np.shares_memory(ser, ser2) # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 @@ -1212,7 +990,7 @@ def test_series_set_axis(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) -def test_set_flags(using_copy_on_write): +def test_set_flags(): ser = Series([1, 2, 3]) ser_orig = ser.copy() ser2 = ser.set_flags(allows_duplicate_labels=False) @@ -1221,47 +999,33 @@ def test_set_flags(using_copy_on_write): # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(ser2, ser) - tm.assert_series_equal(ser, ser_orig) - else: - assert np.shares_memory(ser2, ser) - expected = Series([0, 2, 3]) - tm.assert_series_equal(ser, expected) + assert not np.shares_memory(ser2, ser) + tm.assert_series_equal(ser, ser_orig) @pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}]) -def test_rename_axis(using_copy_on_write, kwargs): +def test_rename_axis(kwargs): df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a")) df_orig = df.copy() df2 = df.rename_axis(**kwargs) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( "func, tz", [("tz_convert", "Europe/Berlin"), ("tz_localize", None)] ) -def test_tz_convert_localize(using_copy_on_write, func, tz): +def test_tz_convert_localize(func, tz): # GH 49473 ser = Series( [1, 2], index=date_range(start="2014-08-01 09:00", freq="h", periods=2, tz=tz) ) ser_orig = ser.copy() ser2 = getattr(ser, func)("US/Central") - - if using_copy_on_write: - assert np.shares_memory(ser.values, ser2.values) - else: - assert not np.shares_memory(ser.values, ser2.values) + assert np.shares_memory(ser.values, ser2.values) # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 @@ -1269,31 +1033,26 @@ def test_tz_convert_localize(using_copy_on_write, func, tz): tm.assert_series_equal(ser, ser_orig) -def test_droplevel(using_copy_on_write): +def test_droplevel(): # GH 49473 index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"]) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=index) df_orig = df.copy() df2 = df.droplevel(0) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) tm.assert_frame_equal(df, df_orig) -def test_squeeze(using_copy_on_write): +def test_squeeze(): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() series = df.squeeze() @@ -1303,16 +1062,11 @@ def test_squeeze(using_copy_on_write): # mutating squeezed df triggers a copy-on-write for that column/block series.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(series.values, get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - # Without CoW the original will be modified - assert np.shares_memory(series.values, get_array(df, "a")) - assert df.loc[0, "a"] == 0 + assert not np.shares_memory(series.values, get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) -def test_items(using_copy_on_write): +def test_items(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() @@ -1325,54 +1079,41 @@ def test_items(using_copy_on_write): # mutating df triggers a copy-on-write for that column / block ser.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(ser, name), get_array(df, name)) - tm.assert_frame_equal(df, df_orig) - else: - # Original frame will be modified - assert df.loc[0, name] == 0 + assert not np.shares_memory(get_array(ser, name), get_array(df, name)) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("dtype", ["int64", "Int64"]) -def test_putmask(using_copy_on_write, dtype): +def test_putmask(dtype): df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype) view = df[:] df_orig = df.copy() df[df == df] = 5 - if using_copy_on_write: - assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) - tm.assert_frame_equal(view, df_orig) - else: - # Without CoW the original will be modified - assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) - assert view.iloc[0, 0] == 5 + assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) + tm.assert_frame_equal(view, df_orig) @pytest.mark.parametrize("dtype", ["int64", "Int64"]) -def test_putmask_no_reference(using_copy_on_write, dtype): +def test_putmask_no_reference(dtype): df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype) arr_a = get_array(df, "a") df[df == df] = 5 - - if using_copy_on_write: - assert np.shares_memory(arr_a, get_array(df, "a")) + assert np.shares_memory(arr_a, get_array(df, "a")) @pytest.mark.parametrize("dtype", ["float64", "Float64"]) -def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype): +def test_putmask_aligns_rhs_no_reference(dtype): df = DataFrame({"a": [1.5, 2], "b": 1.5}, dtype=dtype) arr_a = get_array(df, "a") df[df == df] = DataFrame({"a": [5.5, 5]}) - - if using_copy_on_write: - assert np.shares_memory(arr_a, get_array(df, "a")) + assert np.shares_memory(arr_a, get_array(df, "a")) @pytest.mark.parametrize( "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)] ) -def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): +def test_putmask_dont_copy_some_blocks(val, exp, warn): df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5}) view = df[:] df_orig = df.copy() @@ -1382,19 +1123,13 @@ def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): with tm.assert_produces_warning(warn, match="incompatible dtype"): df[indexer] = val - if using_copy_on_write: - assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) - # TODO(CoW): Could split blocks to avoid copying the whole block - assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp - assert np.shares_memory(get_array(view, "c"), get_array(df, "c")) - assert df._mgr._has_no_reference(1) is not exp - assert not df._mgr._has_no_reference(2) - tm.assert_frame_equal(view, df_orig) - elif val == 5: - # Without CoW the original will be modified, the other case upcasts, e.g. copy - assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) - assert np.shares_memory(get_array(view, "c"), get_array(df, "c")) - assert view.iloc[0, 0] == 5 + assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) + # TODO(CoW): Could split blocks to avoid copying the whole block + assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp + assert np.shares_memory(get_array(view, "c"), get_array(df, "c")) + assert df._mgr._has_no_reference(1) is not exp + assert not df._mgr._has_no_reference(2) + tm.assert_frame_equal(view, df_orig) @pytest.mark.parametrize("dtype", ["int64", "Int64"]) @@ -1405,20 +1140,15 @@ def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): lambda ser: ser.mask(ser <= 0, 10), ], ) -def test_where_mask_noop(using_copy_on_write, dtype, func): +def test_where_mask_noop(dtype, func): ser = Series([1, 2, 3], dtype=dtype) ser_orig = ser.copy() result = func(ser) - - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(result)) - else: - assert not np.shares_memory(get_array(ser), get_array(result)) + assert np.shares_memory(get_array(ser), get_array(result)) result.iloc[0] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(ser), get_array(result)) + assert not np.shares_memory(get_array(ser), get_array(result)) tm.assert_series_equal(ser, ser_orig) @@ -1430,7 +1160,7 @@ def test_where_mask_noop(using_copy_on_write, dtype, func): lambda ser: ser.mask(ser >= 0, 10), ], ) -def test_where_mask(using_copy_on_write, dtype, func): +def test_where_mask(dtype, func): ser = Series([1, 2, 3], dtype=dtype) ser_orig = ser.copy() @@ -1448,59 +1178,40 @@ def test_where_mask(using_copy_on_write, dtype, func): lambda df, val: df.mask(df >= 0, val), ], ) -def test_where_mask_noop_on_single_column(using_copy_on_write, dtype, val, func): +def test_where_mask_noop_on_single_column(dtype, val, func): df = DataFrame({"a": [1, 2, 3], "b": [-4, -5, -6]}, dtype=dtype) df_orig = df.copy() result = func(df, val) - - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(result, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(result, "b")) + assert np.shares_memory(get_array(df, "b"), get_array(result, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) result.iloc[0, 1] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "b"), get_array(result, "b")) + assert not np.shares_memory(get_array(df, "b"), get_array(result, "b")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("func", ["mask", "where"]) -def test_chained_where_mask(using_copy_on_write, func): +def test_chained_where_mask(func): df = DataFrame({"a": [1, 4, 2], "b": 1}) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) - tm.assert_frame_equal(df, df_orig) - - with tm.raises_chained_assignment_error(): - getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) - tm.assert_frame_equal(df, df_orig) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) - - with tm.assert_produces_warning(None): - getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) + with tm.raises_chained_assignment_error(): + getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) + tm.assert_frame_equal(df, df_orig) - with tm.assert_produces_warning(None): - getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True) + with tm.raises_chained_assignment_error(): + getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) + tm.assert_frame_equal(df, df_orig) -def test_asfreq_noop(using_copy_on_write): +def test_asfreq_noop(): df = DataFrame( {"a": [0.0, None, 2.0, 3.0]}, index=date_range("1/1/2000", periods=4, freq="min"), ) df_orig = df.copy() df2 = df.asfreq(freq="min") - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 0] = 0 @@ -1509,17 +1220,16 @@ def test_asfreq_noop(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_iterrows(using_copy_on_write): +def test_iterrows(): df = DataFrame({"a": 0, "b": 1}, index=[1, 2, 3]) df_orig = df.copy() for _, sub in df.iterrows(): sub.iloc[0] = 100 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) -def test_interpolate_creates_copy(using_copy_on_write): +def test_interpolate_creates_copy(): # GH#51126 df = DataFrame({"a": [1.5, np.nan, 3]}) view = df[:] @@ -1527,48 +1237,33 @@ def test_interpolate_creates_copy(using_copy_on_write): df.ffill(inplace=True) df.iloc[0, 0] = 100.5 - - if using_copy_on_write: - tm.assert_frame_equal(view, expected) - else: - expected = DataFrame({"a": [100.5, 1.5, 3]}) - tm.assert_frame_equal(view, expected) + tm.assert_frame_equal(view, expected) -def test_isetitem(using_copy_on_write): +def test_isetitem(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() df2 = df.copy(deep=None) # Trigger a CoW df2.isetitem(1, np.array([-1, -2, -3])) # This is inplace - - if using_copy_on_write: - assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - else: - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.loc[0, "a"] = 0 tm.assert_frame_equal(df, df_orig) # Original is unchanged - - if using_copy_on_write: - assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - else: - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) + assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_isetitem_series(using_copy_on_write, dtype): +def test_isetitem_series(dtype): df = DataFrame({"a": [1, 2, 3], "b": np.array([4, 5, 6], dtype=dtype)}) ser = Series([7, 8, 9]) ser_orig = ser.copy() df.isetitem(0, ser) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), get_array(ser)) - assert not df._mgr._has_no_reference(0) + assert np.shares_memory(get_array(df, "a"), get_array(ser)) + assert not df._mgr._has_no_reference(0) # mutating dataframe doesn't update series df.loc[0, "a"] = 0 @@ -1584,17 +1279,13 @@ def test_isetitem_series(using_copy_on_write, dtype): tm.assert_frame_equal(df, expected) -def test_isetitem_frame(using_copy_on_write): +def test_isetitem_frame(): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2}) rhs = DataFrame({"a": [4, 5, 6], "b": 2}) df.isetitem([0, 1], rhs) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), get_array(rhs, "a")) - assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) - assert not df._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(df, "a"), get_array(rhs, "a")) - assert not np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(rhs, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) + assert not df._mgr._has_no_reference(0) expected = df.copy() rhs.iloc[0, 0] = 100 rhs.iloc[0, 1] = 100 @@ -1602,7 +1293,7 @@ def test_isetitem_frame(using_copy_on_write): @pytest.mark.parametrize("key", ["a", ["a"]]) -def test_get(using_copy_on_write, key): +def test_get(key): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() @@ -1618,7 +1309,7 @@ def test_get(using_copy_on_write, key): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_xs(using_copy_on_write, axis, key, dtype): +def test_xs(axis, key, dtype): single_block = dtype == "int64" df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} @@ -1632,15 +1323,13 @@ def test_xs(using_copy_on_write, axis, key, dtype): else: assert result._mgr._has_no_reference(0) - if using_copy_on_write or single_block: - result.iloc[0] = 0 - + result.iloc[0] = 0 tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("key, level", [("l1", 0), (2, 1)]) -def test_xs_multiindex(using_copy_on_write, key, level, axis): +def test_xs_multiindex(key, level, axis): arr = np.arange(18).reshape(6, 3) index = MultiIndex.from_product([["l1", "l2"], [1, 2, 3]], names=["lev1", "lev2"]) df = DataFrame(arr, index=index, columns=list("abc")) @@ -1659,7 +1348,7 @@ def test_xs_multiindex(using_copy_on_write, key, level, axis): tm.assert_frame_equal(df, df_orig) -def test_update_frame(using_copy_on_write): +def test_update_frame(): df1 = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]}) df2 = DataFrame({"b": [100.0]}, index=[1]) df1_orig = df1.copy() @@ -1668,16 +1357,13 @@ def test_update_frame(using_copy_on_write): expected = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 100.0, 6.0]}) tm.assert_frame_equal(df1, expected) - if using_copy_on_write: - # df1 is updated, but its view not - tm.assert_frame_equal(view, df1_orig) - assert np.shares_memory(get_array(df1, "a"), get_array(view, "a")) - assert not np.shares_memory(get_array(df1, "b"), get_array(view, "b")) - else: - tm.assert_frame_equal(view, expected) + # df1 is updated, but its view not + tm.assert_frame_equal(view, df1_orig) + assert np.shares_memory(get_array(df1, "a"), get_array(view, "a")) + assert not np.shares_memory(get_array(df1, "b"), get_array(view, "b")) -def test_update_series(using_copy_on_write): +def test_update_series(): ser1 = Series([1.0, 2.0, 3.0]) ser2 = Series([100.0], index=[1]) ser1_orig = ser1.copy() @@ -1687,11 +1373,8 @@ def test_update_series(using_copy_on_write): expected = Series([1.0, 100.0, 3.0]) tm.assert_series_equal(ser1, expected) - if using_copy_on_write: - # ser1 is updated, but its view not - tm.assert_series_equal(view, ser1_orig) - else: - tm.assert_series_equal(view, expected) + # ser1 is updated, but its view not + tm.assert_series_equal(view, ser1_orig) def test_update_chained_assignment(): @@ -1707,71 +1390,58 @@ def test_update_chained_assignment(): tm.assert_frame_equal(df, df_orig) -def test_inplace_arithmetic_series(using_copy_on_write): +def test_inplace_arithmetic_series(): ser = Series([1, 2, 3]) ser_orig = ser.copy() data = get_array(ser) ser *= 2 - if using_copy_on_write: - # https://github.com/pandas-dev/pandas/pull/55745 - # changed to NOT update inplace because there is no benefit (actual - # operation already done non-inplace). This was only for the optics - # of updating the backing array inplace, but we no longer want to make - # that guarantee - assert not np.shares_memory(get_array(ser), data) - tm.assert_numpy_array_equal(data, get_array(ser_orig)) - else: - assert np.shares_memory(get_array(ser), data) - tm.assert_numpy_array_equal(data, get_array(ser)) + # https://github.com/pandas-dev/pandas/pull/55745 + # changed to NOT update inplace because there is no benefit (actual + # operation already done non-inplace). This was only for the optics + # of updating the backing array inplace, but we no longer want to make + # that guarantee + assert not np.shares_memory(get_array(ser), data) + tm.assert_numpy_array_equal(data, get_array(ser_orig)) -def test_inplace_arithmetic_series_with_reference(using_copy_on_write): +def test_inplace_arithmetic_series_with_reference(): ser = Series([1, 2, 3]) ser_orig = ser.copy() view = ser[:] ser *= 2 - if using_copy_on_write: - assert not np.shares_memory(get_array(ser), get_array(view)) - tm.assert_series_equal(ser_orig, view) - else: - assert np.shares_memory(get_array(ser), get_array(view)) + assert not np.shares_memory(get_array(ser), get_array(view)) + tm.assert_series_equal(ser_orig, view) @pytest.mark.parametrize("copy", [True, False]) -def test_transpose(using_copy_on_write, copy): +def test_transpose(copy): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() result = df.transpose(copy=copy) - - if not copy or using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) - else: - assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) + assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) result.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) -def test_transpose_different_dtypes(using_copy_on_write): +def test_transpose_different_dtypes(): df = DataFrame({"a": [1, 2, 3], "b": 1.5}) df_orig = df.copy() result = df.T assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) result.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) -def test_transpose_ea_single_column(using_copy_on_write): +def test_transpose_ea_single_column(): df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") result = df.T assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) -def test_transform_frame(using_copy_on_write): +def test_transform_frame(): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() @@ -1780,11 +1450,10 @@ def func(ser): return ser df.transform(func) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) -def test_transform_series(using_copy_on_write): +def test_transform_series(): ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -1793,8 +1462,7 @@ def func(ser): return ser ser.transform(func) - if using_copy_on_write: - tm.assert_series_equal(ser, ser_orig) + tm.assert_series_equal(ser, ser_orig) def test_count_read_only_array(): @@ -1805,36 +1473,30 @@ def test_count_read_only_array(): tm.assert_series_equal(result, expected) -def test_insert_series(using_copy_on_write): +def test_insert_series(): df = DataFrame({"a": [1, 2, 3]}) ser = Series([1, 2, 3]) ser_orig = ser.copy() df.insert(loc=1, value=ser, column="b") - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(df, "b")) - assert not df._mgr._has_no_reference(1) - else: - assert not np.shares_memory(get_array(ser), get_array(df, "b")) + assert np.shares_memory(get_array(ser), get_array(df, "b")) + assert not df._mgr._has_no_reference(1) df.iloc[0, 1] = 100 tm.assert_series_equal(ser, ser_orig) -def test_eval(using_copy_on_write): +def test_eval(): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() result = df.eval("c = a+b") - if using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) - else: - assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) result.iloc[0, 0] = 100 tm.assert_frame_equal(df, df_orig) -def test_eval_inplace(using_copy_on_write): +def test_eval_inplace(): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() df_view = df[:] @@ -1843,11 +1505,10 @@ def test_eval_inplace(using_copy_on_write): assert np.shares_memory(get_array(df, "a"), get_array(df_view, "a")) df.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_frame_equal(df_view, df_orig) + tm.assert_frame_equal(df_view, df_orig) -def test_apply_modify_row(using_copy_on_write): +def test_apply_modify_row(): # Case: applying a function on each row as a Series object, where the # function mutates the row object (which needs to trigger CoW if row is a view) df = DataFrame({"A": [1, 2], "B": [3, 4]}) @@ -1859,10 +1520,7 @@ def transform(row): df.apply(transform, axis=1) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - assert df.loc[0, "B"] == 100 + tm.assert_frame_equal(df, df_orig) # row Series is a copy df = DataFrame({"A": [1, 2], "B": ["b", "c"]}) diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 43d1c74d76db2..6c108847c2bc6 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -89,16 +89,6 @@ def test_preserve_getitem(self): assert df.loc[[0]].flags.allows_duplicate_labels is False assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False - def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write): - if not using_copy_on_write: - request.applymarker(pytest.mark.xfail(reason="Unclear behavior.")) - # NDFrame.__getitem__ will cache the first df['A']. May need to - # invalidate that cache? Update the cached entries? - df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False) - assert df["A"].flags.allows_duplicate_labels is False - df.flags.allows_duplicate_labels = True - assert df["A"].flags.allows_duplicate_labels is True - @pytest.mark.parametrize( "objs, kwargs", [ diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index a7873594ecade..8d173d850583f 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -12,7 +12,7 @@ class TestPeriodIndex: - def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): + def test_getitem_periodindex_duplicates_string_slice(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN") ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx) @@ -22,10 +22,7 @@ def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): expected = ts[1:3] tm.assert_series_equal(result, expected) result[:] = 1 - if using_copy_on_write: - tm.assert_series_equal(ts, original) - else: - assert (ts[1:3] == 1).all() + tm.assert_series_equal(ts, original) # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="Y-JUN") diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 79c3780642e7d..d7ef2d39e8df6 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -32,7 +32,7 @@ class TestCommon: @pytest.mark.parametrize("name", [None, "new_name"]) - def test_to_frame(self, name, index_flat, using_copy_on_write): + def test_to_frame(self, name, index_flat): # see GH#15230, GH#22580 idx = index_flat @@ -46,8 +46,6 @@ def test_to_frame(self, name, index_flat, using_copy_on_write): assert df.index is idx assert len(df.columns) == 1 assert df.columns[0] == idx_name - if not using_copy_on_write: - assert df[idx_name].values is not idx.values df = idx.to_frame(index=False, name=idx_name) assert df.index is not idx diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 4faac0e96abc8..92addeb29252a 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -753,7 +753,7 @@ def test_reindex_items(self): mgr.iget(3).internal_values(), reindexed.iget(3).internal_values() ) - def test_get_numeric_data(self, using_copy_on_write): + def test_get_numeric_data(self): mgr = create_mgr( "int: int; float: float; complex: complex;" "str: object; bool: bool; obj: object; dt: datetime", @@ -774,18 +774,12 @@ def test_get_numeric_data(self, using_copy_on_write): np.array([100.0, 200.0, 300.0]), inplace=True, ) - if using_copy_on_write: - tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - np.array([1.0, 1.0, 1.0]), - ) - else: - tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - np.array([100.0, 200.0, 300.0]), - ) + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([1.0, 1.0, 1.0]), + ) - def test_get_bool_data(self, using_copy_on_write): + def test_get_bool_data(self): mgr = create_mgr( "int: int; float: float; complex: complex;" "str: object; bool: bool; obj: object; dt: datetime", @@ -801,16 +795,10 @@ def test_get_bool_data(self, using_copy_on_write): ) bools.iset(0, np.array([True, False, True]), inplace=True) - if using_copy_on_write: - tm.assert_numpy_array_equal( - mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, True, True]), - ) - else: - tm.assert_numpy_array_equal( - mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, False, True]), - ) + tm.assert_numpy_array_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, True, True]), + ) def test_unicode_repr_doesnt_raise(self): repr(create_mgr("b,\u05d0: object")) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 774f6b61df517..2f9e968dd1b71 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -438,7 +438,7 @@ def test_context_manageri_user_provided(all_parsers, datapath): @skip_pyarrow # ParserError: Empty CSV file -def test_file_descriptor_leak(all_parsers, using_copy_on_write): +def test_file_descriptor_leak(all_parsers): # GH 31488 parser = all_parsers with tm.ensure_clean() as path: diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 6f6252e3929fb..3cba7b7da347e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas._config import using_copy_on_write - from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( pa_version_under11p0, @@ -425,15 +423,10 @@ def test_read_filters(self, engine, tmp_path): repeat=1, ) - def test_write_index(self, engine, using_copy_on_write, request): - check_names = engine != "fastparquet" - if using_copy_on_write and engine == "fastparquet": - request.applymarker( - pytest.mark.xfail(reason="fastparquet write into index") - ) - + def test_write_index(self): + pytest.importorskip("pyarrow") df = pd.DataFrame({"A": [1, 2, 3]}) - check_round_trip(df, engine) + check_round_trip(df, "pyarrow") indexes = [ [2, 3, 4], @@ -446,12 +439,12 @@ def test_write_index(self, engine, using_copy_on_write, request): df.index = index if isinstance(index, pd.DatetimeIndex): df.index = df.index._with_freq(None) # freq doesn't round-trip - check_round_trip(df, engine, check_names=check_names) + check_round_trip(df, "pyarrow") # index with meta-data df.index = [0, 1, 2] df.index.name = "foo" - check_round_trip(df, engine) + check_round_trip(df, "pyarrow") def test_write_multiindex(self, pa): # Not supported in fastparquet as of 0.1.3 or older pyarrow version @@ -1256,23 +1249,6 @@ def test_error_on_using_partition_cols_and_partition_on( partition_cols=partition_cols, ) - @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index") - def test_empty_dataframe(self, fp): - # GH #27339 - df = pd.DataFrame() - expected = df.copy() - check_round_trip(df, fp, expected=expected) - - @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index") - def test_timezone_aware_index(self, fp, timezone_aware_date_list): - idx = 5 * [timezone_aware_date_list] - - df = pd.DataFrame(index=idx, data={"index_as_col": idx}) - - expected = df.copy() - expected.index.name = "index" - check_round_trip(df, fp, expected=expected) - def test_close_file_handle_on_read_error(self): with tm.ensure_clean("test.parquet") as path: pathlib.Path(path).write_bytes(b"breakit") @@ -1361,10 +1337,3 @@ def test_invalid_dtype_backend(self, engine): df.to_parquet(path) with pytest.raises(ValueError, match=msg): read_parquet(path, dtype_backend="numpy") - - @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index") - def test_empty_columns(self, fp): - # GH 52034 - df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name")) - expected = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name")) - check_round_trip(df, fp, expected=expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index fda51b157cd75..4e2af9fef377b 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -36,26 +36,20 @@ def test_reindex(self, multiindex_dataframe_random_data): tm.assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels( - self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write + self, multiindex_year_month_day_dataframe_random_data ): ymd = multiindex_year_month_day_dataframe_random_data new_index = ymd.index[::10] chunk = ymd.reindex(new_index) - if using_copy_on_write: - assert chunk.index.is_(new_index) - else: - assert chunk.index is new_index + assert chunk.index.is_(new_index) chunk = ymd.loc[new_index] assert chunk.index.equals(new_index) ymdT = ymd.T chunk = ymdT.reindex(columns=new_index) - if using_copy_on_write: - assert chunk.columns.is_(new_index) - else: - assert chunk.columns is new_index + assert chunk.columns.is_(new_index) chunk = ymdT.loc[:, new_index] assert chunk.columns.equals(new_index)