-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: df[col] = arr should not overwrite data in df[col] #35417
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 74 commits
a1ce4fc
b1913b7
e600237
140f5f2
c096c5d
cbd45e8
989ba97
bf6e5f5
a5ffd10
e126ab5
8c4f9f3
f716904
59e447a
09e89ee
11b8093
4271303
ed0af51
dffae7e
a7f363b
53992df
0a133d7
db1b668
858d6cb
3deb0a7
68e8715
3a74222
b0896be
f9498a3
d81a696
33ef890
ea49ae2
1a850c8
fed4782
0633cc6
a00702a
1dd58ab
4ffe7f4
247d0f8
9164c83
080c7e9
b47ed01
0290975
13b683d
7e9ea2d
bd55d67
ae9c707
4dd24ca
253f625
0b28fc0
9c5e6fa
2e0bf61
04af8fa
1d2b6d7
65d466b
13077e9
f013ec3
66d5ed0
f23245b
23e9462
6c8b15f
03b3015
687d262
9164a1e
02585c5
3731fc8
184f013
8503671
34a96f7
a82f62b
fe58441
28e6296
1e802b8
c1ab90f
382def7
4023a16
1e60537
072ef99
51fe3b2
7edd45a
9783bce
df6110e
d7257f2
1f9f9c3
36fb2d4
1292a92
9b127bf
831dc71
1d50325
d72f379
81e92d8
67435f8
ab833aa
017501e
0219f7f
032a55a
51d102b
bd6816b
908b57b
62f2437
36b3302
37c9d22
0862ece
38d8106
647a393
8b8d6a2
9c6e008
8285ece
270be1e
9cf69c9
065869a
ca260c5
5dcdc4a
a03dcc3
a1c5732
47b841a
df060a5
017b817
0821a60
608fb9d
a70fcc2
effd630
4e30881
a254ed0
df7b4d3
8f49c7f
a66afa0
fb95732
c0bace5
9e43fe9
f47fa2e
29d3bba
5b8de9a
79c7ae2
fa2dcea
0e8f671
3cdeeb4
acd3514
d97a1ac
4342f5d
f4dafc6
bebb12f
04475e3
ed6f3ec
fe9fe66
1ae50bf
9d32c62
8972875
6524331
123568d
8bef37a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
from pandas.errors import AbstractMethodError, InvalidIndexError | ||
from pandas.util._decorators import doc | ||
|
||
from pandas.core.dtypes.cast import maybe_infer_dtype_type | ||
from pandas.core.dtypes.common import ( | ||
is_array_like, | ||
is_hashable, | ||
|
@@ -675,8 +676,24 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): | |
# GH#38148 | ||
keys = self.obj.columns.union(key, sort=False) | ||
|
||
# Try to get the right dtype when we do this reindex. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't infer_dtype_from do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that infers a dtype, but we need to pass a scalar fill_value to reindex_axis There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed this edit as no longer necessary. i think at this point all the controversial bits are gone and we're down to just the bugfix. |
||
fv = None | ||
if not is_list_like(value): | ||
fv = value | ||
elif len(value) and not is_list_like(value[0]): | ||
fv = value[0] | ||
else: | ||
dtype = maybe_infer_dtype_type(value) | ||
if dtype is not None: | ||
fv = dtype.type(0) | ||
|
||
self.obj._mgr = self.obj._mgr.reindex_axis( | ||
keys, axis=0, copy=False, consolidate=False, only_slice=True | ||
keys, | ||
axis=0, | ||
copy=False, | ||
consolidate=False, | ||
only_slice=True, | ||
fill_value=fv, | ||
) | ||
|
||
def __setitem__(self, key, value): | ||
|
@@ -1580,7 +1597,6 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): | |
# essentially this separates out the block that is needed | ||
# to possibly be modified | ||
if self.ndim > 1 and i == info_axis: | ||
|
||
# add the new item, and set the value | ||
# must have all defined axes if we have a scalar | ||
# or a list-like on the non-info axes if we have a | ||
|
@@ -1811,6 +1827,13 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): | |
# multi-dim object | ||
# GH#6149 (null slice), GH#10408 (full bounds) | ||
if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): | ||
blk = ser._mgr.blocks[0] | ||
if blk._can_hold_element(value) and is_scalar(value): | ||
# FIXME: ExtensionBlock._can_hold_element | ||
# We can do an inplace-setting, do it directly on _values | ||
# to get our underlying | ||
ser._values[plane_indexer] = value | ||
return | ||
ser = value | ||
else: | ||
# set the item, possibly having a dtype change | ||
|
@@ -1819,7 +1842,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): | |
ser._maybe_update_cacher(clear=True) | ||
|
||
# reset the sliced object if unique | ||
self.obj._iset_item(loc, ser) | ||
self.obj._iset_item(loc, ser, inplace=True) | ||
|
||
def _setitem_single_block(self, indexer, value, name: str): | ||
""" | ||
|
@@ -1845,7 +1868,11 @@ def _setitem_single_block(self, indexer, value, name: str): | |
) | ||
and item_labels.is_unique | ||
): | ||
self.obj[item_labels[indexer[info_axis]]] = value | ||
col = item_labels[indexer[info_axis]] | ||
loc = item_labels.get_loc(col) | ||
if isinstance(value, ABCDataFrame): | ||
return self.obj._set_item_frame_value(col, value) | ||
self.obj._iset_item(loc, value, inplace=True) | ||
return | ||
|
||
indexer = maybe_convert_ix(*indexer) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -704,7 +704,7 @@ def test_identity_slice_returns_new_object(self): | |
assert sliced_df is not original_df | ||
|
||
# should be a shallow copy | ||
original_df["a"] = [4, 4, 4] | ||
original_df.loc[:, "a"] = [4, 4, 4] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated both tests |
||
assert (sliced_df["a"] == 4).all() | ||
|
||
original_series = Series([1, 2, 3, 4, 5, 6]) | ||
|
@@ -728,8 +728,8 @@ def test_series_indexing_zerodim_np_array(self): | |
result = s.iloc[np.array(0)] | ||
assert result == 1 | ||
|
||
@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") | ||
def test_iloc_setitem_categorical_updates_inplace(self): | ||
# GH#35417 | ||
# Mixed dtype ensures we go through take_split_path in setitem_with_indexer | ||
cat = Categorical(["A", "B", "C"]) | ||
df = DataFrame({1: cat, 2: [1, 2, 3]}) | ||
|
Uh oh!
There was an error while loading. Please reload this page.