1
+ from string import ascii_letters as letters
2
+
1
3
import numpy as np
2
4
import pytest
3
5
6
8
import pandas ._testing as tm
7
9
import pandas .core .common as com
8
10
11
+ msg = "A value is trying to be set on a copy of a slice from a DataFrame"
12
+
13
+
14
+ def random_text (nobs = 100 ):
15
+ df = []
16
+ for i in range (nobs ):
17
+ idx = np .random .randint (len (letters ), size = 2 )
18
+ idx .sort ()
19
+
20
+ df .append ([letters [idx [0 ] : idx [1 ]]])
21
+
22
+ return DataFrame (df , columns = ["letters" ])
23
+
9
24
10
25
class TestCaching :
11
26
def test_slice_consolidate_invalidate_item_cache (self ):
@@ -30,23 +45,24 @@ def test_slice_consolidate_invalidate_item_cache(self):
30
45
df ._clear_item_cache ()
31
46
tm .assert_almost_equal (df ["bb" ][0 ], 0.17 )
32
47
33
- def test_setitem_cache_updating (self ):
48
+ @pytest .mark .parametrize ("do_ref" , [True , False ])
49
+ def test_setitem_cache_updating (self , do_ref ):
34
50
# GH 5424
35
51
cont = ["one" , "two" , "three" , "four" , "five" , "six" , "seven" ]
36
52
37
- for do_ref in [True , False ]:
38
- df = DataFrame ({"a" : cont , "b" : cont [3 :] + cont [:3 ], "c" : np .arange (7 )})
53
+ df = DataFrame ({"a" : cont , "b" : cont [3 :] + cont [:3 ], "c" : np .arange (7 )})
39
54
40
- # ref the cache
41
- if do_ref :
42
- df .loc [0 , "c" ]
55
+ # ref the cache
56
+ if do_ref :
57
+ df .loc [0 , "c" ]
43
58
44
- # set it
45
- df .loc [7 , "c" ] = 1
59
+ # set it
60
+ df .loc [7 , "c" ] = 1
46
61
47
- assert df .loc [0 , "c" ] == 0.0
48
- assert df .loc [7 , "c" ] == 1.0
62
+ assert df .loc [0 , "c" ] == 0.0
63
+ assert df .loc [7 , "c" ] == 1.0
49
64
65
+ def test_setitem_cache_updating_slices (self ):
50
66
# GH 7084
51
67
# not updating cache on series setting with slices
52
68
expected = DataFrame (
@@ -146,6 +162,9 @@ def test_detect_chained_assignment(self):
146
162
df ["A" ][1 ] = - 6
147
163
tm .assert_frame_equal (df , expected )
148
164
165
+ @pytest .mark .arm_slow
166
+ def test_detect_chained_assignment_raises (self ):
167
+
149
168
# test with the chaining
150
169
df = DataFrame (
151
170
{
@@ -155,7 +174,6 @@ def test_detect_chained_assignment(self):
155
174
)
156
175
assert df ._is_copy is None
157
176
158
- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
159
177
with pytest .raises (com .SettingWithCopyError , match = msg ):
160
178
df ["A" ][0 ] = - 5
161
179
@@ -164,6 +182,9 @@ def test_detect_chained_assignment(self):
164
182
165
183
assert df ["A" ]._is_copy is None
166
184
185
+ @pytest .mark .arm_slow
186
+ def test_detect_chained_assignment_fails (self ):
187
+
167
188
# Using a copy (the chain), fails
168
189
df = DataFrame (
169
190
{
@@ -175,6 +196,9 @@ def test_detect_chained_assignment(self):
175
196
with pytest .raises (com .SettingWithCopyError , match = msg ):
176
197
df .loc [0 ]["A" ] = - 5
177
198
199
+ @pytest .mark .arm_slow
200
+ def test_detect_chained_assignment_doc_example (self ):
201
+
178
202
# Doc example
179
203
df = DataFrame (
180
204
{
@@ -188,6 +212,9 @@ def test_detect_chained_assignment(self):
188
212
indexer = df .a .str .startswith ("o" )
189
213
df [indexer ]["c" ] = 42
190
214
215
+ @pytest .mark .arm_slow
216
+ def test_detect_chained_assignment_object_dtype (self ):
217
+
191
218
expected = DataFrame ({"A" : [111 , "bbb" , "ccc" ], "B" : [1 , 2 , 3 ]})
192
219
df = DataFrame ({"A" : ["aaa" , "bbb" , "ccc" ], "B" : [1 , 2 , 3 ]})
193
220
@@ -200,6 +227,9 @@ def test_detect_chained_assignment(self):
200
227
df .loc [0 , "A" ] = 111
201
228
tm .assert_frame_equal (df , expected )
202
229
230
+ @pytest .mark .arm_slow
231
+ def test_detect_chained_assignment_is_copy_pickle (self ):
232
+
203
233
# gh-5475: Make sure that is_copy is picked up reconstruction
204
234
df = DataFrame ({"A" : [1 , 2 ]})
205
235
assert df ._is_copy is None
@@ -210,18 +240,10 @@ def test_detect_chained_assignment(self):
210
240
df2 ["B" ] = df2 ["A" ]
211
241
df2 ["B" ] = df2 ["A" ]
212
242
213
- # gh-5597: a spurious raise as we are setting the entire column here
214
- from string import ascii_letters as letters
215
-
216
- def random_text (nobs = 100 ):
217
- df = []
218
- for i in range (nobs ):
219
- idx = np .random .randint (len (letters ), size = 2 )
220
- idx .sort ()
221
-
222
- df .append ([letters [idx [0 ] : idx [1 ]]])
243
+ @pytest .mark .arm_slow
244
+ def test_detect_chained_assignment_setting_entire_column (self ):
223
245
224
- return DataFrame ( df , columns = [ "letters" ])
246
+ # gh-5597: a spurious raise as we are setting the entire column here
225
247
226
248
df = random_text (100000 )
227
249
@@ -239,6 +261,9 @@ def random_text(nobs=100):
239
261
assert df ._is_copy is None
240
262
df ["letters" ] = df ["letters" ].apply (str .lower )
241
263
264
+ @pytest .mark .arm_slow
265
+ def test_detect_chained_assignment_implicit_take (self ):
266
+
242
267
# Implicitly take
243
268
df = random_text (100000 )
244
269
indexer = df .letters .apply (lambda x : len (x ) > 10 )
@@ -247,6 +272,9 @@ def random_text(nobs=100):
247
272
assert df ._is_copy is not None
248
273
df ["letters" ] = df ["letters" ].apply (str .lower )
249
274
275
+ @pytest .mark .arm_slow
276
+ def test_detect_chained_assignment_implicit_take2 (self ):
277
+
250
278
# Implicitly take 2
251
279
df = random_text (100000 )
252
280
indexer = df .letters .apply (lambda x : len (x ) > 10 )
@@ -261,20 +289,32 @@ def random_text(nobs=100):
261
289
df ["letters" ] = df ["letters" ].apply (str .lower )
262
290
assert df ._is_copy is None
263
291
292
+ @pytest .mark .arm_slow
293
+ def test_detect_chained_assignment_str (self ):
294
+
264
295
df = random_text (100000 )
265
296
indexer = df .letters .apply (lambda x : len (x ) > 10 )
266
297
df .loc [indexer , "letters" ] = df .loc [indexer , "letters" ].apply (str .lower )
267
298
299
+ @pytest .mark .arm_slow
300
+ def test_detect_chained_assignment_is_copy (self ):
301
+
268
302
# an identical take, so no copy
269
303
df = DataFrame ({"a" : [1 ]}).dropna ()
270
304
assert df ._is_copy is None
271
305
df ["a" ] += 1
272
306
307
+ @pytest .mark .arm_slow
308
+ def test_detect_chained_assignment_sorting (self ):
309
+
273
310
df = DataFrame (np .random .randn (10 , 4 ))
274
- s = df .iloc [:, 0 ].sort_values ()
311
+ ser = df .iloc [:, 0 ].sort_values ()
275
312
276
- tm .assert_series_equal (s , df .iloc [:, 0 ].sort_values ())
277
- tm .assert_series_equal (s , df [0 ].sort_values ())
313
+ tm .assert_series_equal (ser , df .iloc [:, 0 ].sort_values ())
314
+ tm .assert_series_equal (ser , df [0 ].sort_values ())
315
+
316
+ @pytest .mark .arm_slow
317
+ def test_detect_chained_assignment_false_positives (self ):
278
318
279
319
# see gh-6025: false positives
280
320
df = DataFrame ({"column1" : ["a" , "a" , "a" ], "column2" : [4 , 8 , 9 ]})
@@ -289,6 +329,9 @@ def random_text(nobs=100):
289
329
df ["column1" ] = df ["column1" ] + "c"
290
330
str (df )
291
331
332
+ @pytest .mark .arm_slow
333
+ def test_detect_chained_assignment_undefined_column (self ):
334
+
292
335
# from SO:
293
336
# https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
294
337
df = DataFrame (np .arange (0 , 9 ), columns = ["count" ])
@@ -297,6 +340,9 @@ def random_text(nobs=100):
297
340
with pytest .raises (com .SettingWithCopyError , match = msg ):
298
341
df .iloc [0 :5 ]["group" ] = "a"
299
342
343
+ @pytest .mark .arm_slow
344
+ def test_detect_chained_assignment_changing_dtype (self ):
345
+
300
346
# Mixed type setting but same dtype & changing dtype
301
347
df = DataFrame (
302
348
{
@@ -324,7 +370,6 @@ def test_setting_with_copy_bug(self):
324
370
)
325
371
mask = pd .isna (df .c )
326
372
327
- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
328
373
with pytest .raises (com .SettingWithCopyError , match = msg ):
329
374
df [["c" ]][mask ] = df [["b" ]][mask ]
330
375
@@ -342,7 +387,6 @@ def test_detect_chained_assignment_warnings_errors(self):
342
387
with tm .assert_produces_warning (com .SettingWithCopyWarning ):
343
388
df .loc [0 ]["A" ] = 111
344
389
345
- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
346
390
with option_context ("chained_assignment" , "raise" ):
347
391
with pytest .raises (com .SettingWithCopyError , match = msg ):
348
392
df .loc [0 ]["A" ] = 111
@@ -386,6 +430,7 @@ def test_cache_updating(self):
386
430
assert "Hello Friend" in df ["A" ].index
387
431
assert "Hello Friend" in df ["B" ].index
388
432
433
+ def test_cache_updating2 (self ):
389
434
# 10264
390
435
df = DataFrame (
391
436
np .zeros ((5 , 5 ), dtype = "int64" ),
0 commit comments