diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 9a65321398ea4..1ac2a16660f93 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -1,3 +1,5 @@ +from string import ascii_letters as letters + import numpy as np import pytest @@ -6,6 +8,19 @@ import pandas._testing as tm import pandas.core.common as com +msg = "A value is trying to be set on a copy of a slice from a DataFrame" + + +def random_text(nobs=100): + df = [] + for i in range(nobs): + idx = np.random.randint(len(letters), size=2) + idx.sort() + + df.append([letters[idx[0] : idx[1]]]) + + return DataFrame(df, columns=["letters"]) + class TestCaching: def test_slice_consolidate_invalidate_item_cache(self): @@ -30,23 +45,24 @@ def test_slice_consolidate_invalidate_item_cache(self): df._clear_item_cache() tm.assert_almost_equal(df["bb"][0], 0.17) - def test_setitem_cache_updating(self): + @pytest.mark.parametrize("do_ref", [True, False]) + def test_setitem_cache_updating(self, do_ref): # GH 5424 cont = ["one", "two", "three", "four", "five", "six", "seven"] - for do_ref in [True, False]: - df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)}) + df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)}) - # ref the cache - if do_ref: - df.loc[0, "c"] + # ref the cache + if do_ref: + df.loc[0, "c"] - # set it - df.loc[7, "c"] = 1 + # set it + df.loc[7, "c"] = 1 - assert df.loc[0, "c"] == 0.0 - assert df.loc[7, "c"] == 1.0 + assert df.loc[0, "c"] == 0.0 + assert df.loc[7, "c"] == 1.0 + def test_setitem_cache_updating_slices(self): # GH 7084 # not updating cache on series setting with slices expected = DataFrame( @@ -146,6 +162,9 @@ def test_detect_chained_assignment(self): df["A"][1] = -6 tm.assert_frame_equal(df, expected) + @pytest.mark.arm_slow + def test_detect_chained_assignment_raises(self): + # test with the chaining df = DataFrame( { @@ -155,7 +174,6 @@ def test_detect_chained_assignment(self): ) assert df._is_copy is None - msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(com.SettingWithCopyError, match=msg): df["A"][0] = -5 @@ -164,6 +182,9 @@ def test_detect_chained_assignment(self): assert df["A"]._is_copy is None + @pytest.mark.arm_slow + def test_detect_chained_assignment_fails(self): + # Using a copy (the chain), fails df = DataFrame( { @@ -175,6 +196,9 @@ def test_detect_chained_assignment(self): with pytest.raises(com.SettingWithCopyError, match=msg): df.loc[0]["A"] = -5 + @pytest.mark.arm_slow + def test_detect_chained_assignment_doc_example(self): + # Doc example df = DataFrame( { @@ -188,6 +212,9 @@ def test_detect_chained_assignment(self): indexer = df.a.str.startswith("o") df[indexer]["c"] = 42 + @pytest.mark.arm_slow + def test_detect_chained_assignment_object_dtype(self): + expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) @@ -200,6 +227,9 @@ def test_detect_chained_assignment(self): df.loc[0, "A"] = 111 tm.assert_frame_equal(df, expected) + @pytest.mark.arm_slow + def test_detect_chained_assignment_is_copy_pickle(self): + # gh-5475: Make sure that is_copy is picked up reconstruction df = DataFrame({"A": [1, 2]}) assert df._is_copy is None @@ -210,18 +240,10 @@ def test_detect_chained_assignment(self): df2["B"] = df2["A"] df2["B"] = df2["A"] - # gh-5597: a spurious raise as we are setting the entire column here - from string import ascii_letters as letters - - def random_text(nobs=100): - df = [] - for i in range(nobs): - idx = np.random.randint(len(letters), size=2) - idx.sort() - - df.append([letters[idx[0] : idx[1]]]) + @pytest.mark.arm_slow + def test_detect_chained_assignment_setting_entire_column(self): - return DataFrame(df, columns=["letters"]) + # gh-5597: a spurious raise as we are setting the entire column here df = random_text(100000) @@ -239,6 +261,9 @@ def random_text(nobs=100): assert df._is_copy is None df["letters"] = df["letters"].apply(str.lower) + @pytest.mark.arm_slow + def test_detect_chained_assignment_implicit_take(self): + # Implicitly take df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) @@ -247,6 +272,9 @@ def random_text(nobs=100): assert df._is_copy is not None df["letters"] = df["letters"].apply(str.lower) + @pytest.mark.arm_slow + def test_detect_chained_assignment_implicit_take2(self): + # Implicitly take 2 df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) @@ -261,20 +289,32 @@ def random_text(nobs=100): df["letters"] = df["letters"].apply(str.lower) assert df._is_copy is None + @pytest.mark.arm_slow + def test_detect_chained_assignment_str(self): + df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower) + @pytest.mark.arm_slow + def test_detect_chained_assignment_is_copy(self): + # an identical take, so no copy df = DataFrame({"a": [1]}).dropna() assert df._is_copy is None df["a"] += 1 + @pytest.mark.arm_slow + def test_detect_chained_assignment_sorting(self): + df = DataFrame(np.random.randn(10, 4)) - s = df.iloc[:, 0].sort_values() + ser = df.iloc[:, 0].sort_values() - tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) - tm.assert_series_equal(s, df[0].sort_values()) + tm.assert_series_equal(ser, df.iloc[:, 0].sort_values()) + tm.assert_series_equal(ser, df[0].sort_values()) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_false_positives(self): # see gh-6025: false positives df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]}) @@ -289,6 +329,9 @@ def random_text(nobs=100): df["column1"] = df["column1"] + "c" str(df) + @pytest.mark.arm_slow + def test_detect_chained_assignment_undefined_column(self): + # from SO: # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc df = DataFrame(np.arange(0, 9), columns=["count"]) @@ -297,6 +340,9 @@ def random_text(nobs=100): with pytest.raises(com.SettingWithCopyError, match=msg): df.iloc[0:5]["group"] = "a" + @pytest.mark.arm_slow + def test_detect_chained_assignment_changing_dtype(self): + # Mixed type setting but same dtype & changing dtype df = DataFrame( { @@ -324,7 +370,6 @@ def test_setting_with_copy_bug(self): ) mask = pd.isna(df.c) - msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(com.SettingWithCopyError, match=msg): df[["c"]][mask] = df[["b"]][mask] @@ -342,7 +387,6 @@ def test_detect_chained_assignment_warnings_errors(self): with tm.assert_produces_warning(com.SettingWithCopyWarning): df.loc[0]["A"] = 111 - msg = "A value is trying to be set on a copy of a slice from a DataFrame" with option_context("chained_assignment", "raise"): with pytest.raises(com.SettingWithCopyError, match=msg): df.loc[0]["A"] = 111 @@ -386,6 +430,7 @@ def test_cache_updating(self): assert "Hello Friend" in df["A"].index assert "Hello Friend" in df["B"].index + def test_cache_updating2(self): # 10264 df = DataFrame( np.zeros((5, 5), dtype="int64"),