Skip to content

TST: split chaining tests #39684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 8, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 72 additions & 27 deletions pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from string import ascii_letters as letters

import numpy as np
import pytest

Expand All @@ -6,6 +8,19 @@
import pandas._testing as tm
import pandas.core.common as com

msg = "A value is trying to be set on a copy of a slice from a DataFrame"


def random_text(nobs=100):
df = []
for i in range(nobs):
idx = np.random.randint(len(letters), size=2)
idx.sort()

df.append([letters[idx[0] : idx[1]]])

return DataFrame(df, columns=["letters"])


class TestCaching:
def test_slice_consolidate_invalidate_item_cache(self):
Expand All @@ -30,23 +45,24 @@ def test_slice_consolidate_invalidate_item_cache(self):
df._clear_item_cache()
tm.assert_almost_equal(df["bb"][0], 0.17)

def test_setitem_cache_updating(self):
@pytest.mark.parametrize("do_ref", [True, False])
def test_setitem_cache_updating(self, do_ref):
# GH 5424
cont = ["one", "two", "three", "four", "five", "six", "seven"]

for do_ref in [True, False]:
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})

# ref the cache
if do_ref:
df.loc[0, "c"]
# ref the cache
if do_ref:
df.loc[0, "c"]

# set it
df.loc[7, "c"] = 1
# set it
df.loc[7, "c"] = 1

assert df.loc[0, "c"] == 0.0
assert df.loc[7, "c"] == 1.0
assert df.loc[0, "c"] == 0.0
assert df.loc[7, "c"] == 1.0

def test_setitem_cache_updating_slices(self):
# GH 7084
# not updating cache on series setting with slices
expected = DataFrame(
Expand Down Expand Up @@ -146,6 +162,9 @@ def test_detect_chained_assignment(self):
df["A"][1] = -6
tm.assert_frame_equal(df, expected)

@pytest.mark.arm_slow
def test_detect_chained_assignment_raises(self):

# test with the chaining
df = DataFrame(
{
Expand All @@ -155,7 +174,6 @@ def test_detect_chained_assignment(self):
)
assert df._is_copy is None

msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df["A"][0] = -5

Expand All @@ -164,6 +182,9 @@ def test_detect_chained_assignment(self):

assert df["A"]._is_copy is None

@pytest.mark.arm_slow
def test_detect_chained_assignment_fails(self):

# Using a copy (the chain), fails
df = DataFrame(
{
Expand All @@ -175,6 +196,9 @@ def test_detect_chained_assignment(self):
with pytest.raises(com.SettingWithCopyError, match=msg):
df.loc[0]["A"] = -5

@pytest.mark.arm_slow
def test_detect_chained_assignment_doc_example(self):

# Doc example
df = DataFrame(
{
Expand All @@ -188,6 +212,9 @@ def test_detect_chained_assignment(self):
indexer = df.a.str.startswith("o")
df[indexer]["c"] = 42

@pytest.mark.arm_slow
def test_detect_chained_assignment_object_dtype(self):

expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})

Expand All @@ -200,6 +227,9 @@ def test_detect_chained_assignment(self):
df.loc[0, "A"] = 111
tm.assert_frame_equal(df, expected)

@pytest.mark.arm_slow
def test_detect_chained_assignment_is_copy_pickle(self):

# gh-5475: Make sure that is_copy is picked up reconstruction
df = DataFrame({"A": [1, 2]})
assert df._is_copy is None
Expand All @@ -210,18 +240,10 @@ def test_detect_chained_assignment(self):
df2["B"] = df2["A"]
df2["B"] = df2["A"]

# gh-5597: a spurious raise as we are setting the entire column here
from string import ascii_letters as letters

def random_text(nobs=100):
df = []
for i in range(nobs):
idx = np.random.randint(len(letters), size=2)
idx.sort()

df.append([letters[idx[0] : idx[1]]])
@pytest.mark.arm_slow
def test_detect_chained_assignment_setting_entire_column(self):

return DataFrame(df, columns=["letters"])
# gh-5597: a spurious raise as we are setting the entire column here

df = random_text(100000)

Expand All @@ -239,6 +261,9 @@ def random_text(nobs=100):
assert df._is_copy is None
df["letters"] = df["letters"].apply(str.lower)

@pytest.mark.arm_slow
def test_detect_chained_assignment_implicit_take(self):

# Implicitly take
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
Expand All @@ -247,6 +272,9 @@ def random_text(nobs=100):
assert df._is_copy is not None
df["letters"] = df["letters"].apply(str.lower)

@pytest.mark.arm_slow
def test_detect_chained_assignment_implicit_take2(self):

# Implicitly take 2
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
Expand All @@ -261,20 +289,32 @@ def random_text(nobs=100):
df["letters"] = df["letters"].apply(str.lower)
assert df._is_copy is None

@pytest.mark.arm_slow
def test_detect_chained_assignment_str(self):

df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)

@pytest.mark.arm_slow
def test_detect_chained_assignment_is_copy(self):

# an identical take, so no copy
df = DataFrame({"a": [1]}).dropna()
assert df._is_copy is None
df["a"] += 1

@pytest.mark.arm_slow
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be worthile to move the slow to a separate file and just mark it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im not wild about that pattern, but dont have a better alternative ATM. im happy enough with how it is here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i c. maybe leave these decorators, but split anyhow just to make it more obvious?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the follow-up is to see if there's only a subset of these that are actually slow

def test_detect_chained_assignment_sorting(self):

df = DataFrame(np.random.randn(10, 4))
s = df.iloc[:, 0].sort_values()
ser = df.iloc[:, 0].sort_values()

tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
tm.assert_series_equal(s, df[0].sort_values())
tm.assert_series_equal(ser, df.iloc[:, 0].sort_values())
tm.assert_series_equal(ser, df[0].sort_values())

@pytest.mark.arm_slow
def test_detect_chained_assignment_false_positives(self):

# see gh-6025: false positives
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
Expand All @@ -289,6 +329,9 @@ def random_text(nobs=100):
df["column1"] = df["column1"] + "c"
str(df)

@pytest.mark.arm_slow
def test_detect_chained_assignment_undefined_column(self):

# from SO:
# https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
df = DataFrame(np.arange(0, 9), columns=["count"])
Expand All @@ -297,6 +340,9 @@ def random_text(nobs=100):
with pytest.raises(com.SettingWithCopyError, match=msg):
df.iloc[0:5]["group"] = "a"

@pytest.mark.arm_slow
def test_detect_chained_assignment_changing_dtype(self):

# Mixed type setting but same dtype & changing dtype
df = DataFrame(
{
Expand Down Expand Up @@ -324,7 +370,6 @@ def test_setting_with_copy_bug(self):
)
mask = pd.isna(df.c)

msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df[["c"]][mask] = df[["b"]][mask]

Expand All @@ -342,7 +387,6 @@ def test_detect_chained_assignment_warnings_errors(self):
with tm.assert_produces_warning(com.SettingWithCopyWarning):
df.loc[0]["A"] = 111

msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with option_context("chained_assignment", "raise"):
with pytest.raises(com.SettingWithCopyError, match=msg):
df.loc[0]["A"] = 111
Expand Down Expand Up @@ -386,6 +430,7 @@ def test_cache_updating(self):
assert "Hello Friend" in df["A"].index
assert "Hello Friend" in df["B"].index

def test_cache_updating2(self):
# 10264
df = DataFrame(
np.zeros((5, 5), dtype="int64"),
Expand Down