From 30246a7d2b113b7115fd0708655ae8e1c3f2e6c5 Mon Sep 17 00:00:00 2001 From: behzad nouri Date: Fri, 3 Oct 2014 18:02:39 -0400 Subject: [PATCH] type diversity breaks alignment --- doc/source/v0.15.0.txt | 1 + pandas/core/indexing.py | 24 ++++++--------- pandas/tests/test_frame.py | 1 + pandas/tests/test_indexing.py | 58 ++++++++++++++++++++++++++++++++--- 4 files changed, 66 insertions(+), 18 deletions(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 2776f6f9fcb35..cfd66a3a3829e 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -1007,3 +1007,4 @@ Bug Fixes - Bug in Index.intersection on non-monotonic non-unique indexes (:issue:`8362`). - Bug in masked series assignment where mismatching types would break alignment (:issue:`8387`) - Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`) +- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 27a31a13a0259..8a77cc85efced 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -439,16 +439,10 @@ def can_do_equal_len(): if isinstance(value, ABCDataFrame) and value.ndim > 1: for item in labels: - # align to - if item in value: - v = value[item] - i = self.obj[item].index - v = v.reindex(i & v.index) - - setter(item, v.values) - else: - setter(item, np.nan) + v = np.nan if item not in value else \ + self._align_series(indexer[0], value[item]) + setter(item, v) # we have an equal len ndarray/convertible to our labels elif np.array(value).ndim == 2: @@ -511,6 +505,10 @@ def _align_series(self, indexer, ser): if isinstance(indexer, tuple): + # flatten np.ndarray indexers + ravel = lambda i: i.ravel() if isinstance(i, np.ndarray) else i + indexer = tuple(map(ravel, indexer)) + aligners = [not _is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 @@ -536,12 +534,11 @@ def _align_series(self, indexer, ser): # series, so need to broadcast (see GH5206) if (sum_aligners == self.ndim and all([com._is_sequence(_) for _ in indexer])): - ser = ser.reindex(obj.axes[0][indexer[0].ravel()], - copy=True).values + ser = ser.reindex(obj.axes[0][indexer[0]], copy=True).values # single indexer if len(indexer) > 1: - l = len(indexer[1].ravel()) + l = len(indexer[1]) ser = np.tile(ser, l).reshape(l, -1).T return ser @@ -557,7 +554,7 @@ def _align_series(self, indexer, ser): if not is_list_like(new_ix): new_ix = Index([new_ix]) else: - new_ix = Index(new_ix.ravel()) + new_ix = Index(new_ix) if ser.index.equals(new_ix) or not len(new_ix): return ser.values.copy() @@ -1765,4 +1762,3 @@ def _maybe_droplevels(index, key): pass return index - diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 700eb8591fdf9..79c8647948cd1 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1405,6 +1405,7 @@ def test_setitem_frame(self): # key is unaligned with values f = self.mixed_frame.copy() piece = f.ix[:2, ['A']] + piece.index = f.index[-2:] key = (slice(-2, None), ['A', 'B']) f.ix[key] = piece piece['B'] = np.nan diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 7831fe811c2ff..7213aaafd1376 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1031,11 +1031,12 @@ def test_loc_setitem_frame(self): def test_loc_setitem_frame_multiples(self): - # multiple setting df = DataFrame({ 'A' : ['foo','bar','baz'], 'B' : Series(range(3),dtype=np.int64) }) - df.loc[0:1] = df.loc[1:2] + rhs = df.loc[1:2] + rhs.index = df.index[0:2] + df.loc[0:1] = rhs expected = DataFrame({ 'A' : ['bar','baz','baz'], 'B' : Series([1,2,2],dtype=np.int64) }) assert_frame_equal(df, expected) @@ -1047,8 +1048,9 @@ def test_loc_setitem_frame_multiples(self): expected = DataFrame({ 'date' : [Timestamp('20000101'),Timestamp('20000102'),Timestamp('20000101'), Timestamp('20000102'),Timestamp('20000103')], 'val' : Series([0,1,0,1,2],dtype=np.int64) }) - - df.loc[2:4] = df.loc[0:2] + rhs = df.loc[0:2] + rhs.index = df.index[2:5] + df.loc[2:4] = rhs assert_frame_equal(df, expected) def test_iloc_getitem_frame(self): @@ -3987,6 +3989,54 @@ def test_float_index_at_iat(self): for i in range(len(s)): self.assertEqual(s.iat[i], i + 1) + def test_rhs_alignment(self): + # GH8258, tests that both rows & columns are aligned to what is + # assigned to. covers both uniform data-type & multi-type cases + def run_tests(df, rhs, right): + # label, index, slice + r, i, s = list('bcd'), [1, 2, 3], slice(1, 4) + c, j, l = ['joe', 'jolie'], [1, 2], slice(1, 3) + + left = df.copy() + left.loc[r, c] = rhs + assert_frame_equal(left, right) + + left = df.copy() + left.iloc[i, j] = rhs + assert_frame_equal(left, right) + + left = df.copy() + left.ix[s, l] = rhs + assert_frame_equal(left, right) + + left = df.copy() + left.ix[i, j] = rhs + assert_frame_equal(left, right) + + left = df.copy() + left.ix[r, c] = rhs + assert_frame_equal(left, right) + + xs = np.arange(20).reshape(5, 4) + cols = ['jim', 'joe', 'jolie', 'joline'] + df = pd.DataFrame(xs, columns=cols, index=list('abcde')) + + # right hand side; permute the indices and multiplpy by -2 + rhs = - 2 * df.iloc[3:0:-1, 2:0:-1] + + # expected `right` result; just multiply by -2 + right = df.copy() + right.iloc[1:4, 1:3] *= -2 + + # run tests with uniform dtypes + run_tests(df, rhs, right) + + # make frames multi-type & re-run tests + for frame in [df, rhs, right]: + frame['joe'] = frame['joe'].astype('float64') + frame['jolie'] = frame['jolie'].map('@{0}'.format) + + run_tests(df, rhs, right) class TestSeriesNoneCoercion(tm.TestCase): EXPECTED_RESULTS = [