From af1057367679574b60647032ce892d27b67ad40f Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 14 Mar 2014 09:54:40 -0400 Subject: [PATCH] BUG: Bug in DataFrame.reindex(labels,level=0) with with reordered labels (GH4088) --- doc/source/release.rst | 7 ++++--- pandas/core/index.py | 34 ++++++++++++++++++++++++--------- pandas/tests/test_multilevel.py | 17 +++++++++++++++++ 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 369f83066ed0d..996e870db2119 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -125,9 +125,9 @@ API Changes DataFrame returned by ``GroupBy.apply`` (:issue:`6124`). This facilitates ``DataFrame.stack`` operations where the name of the column index is used as the name of the inserted column containing the pivoted data. - -- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions - now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A + +- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions + now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments will not be supported in a future release (:issue:`5505`) @@ -254,6 +254,7 @@ Bug Fixes - Bug in ``iloc`` indexing when positional indexer matched Int64Index of corresponding axis no reordering happened (:issue:`6612`) - Bug in ``fillna`` with ``limit`` and ``value`` specified - Bug in ``DataFrame.to_stata`` when columns have non-string names (:issue:`4558`) +- Bug in ``DataFrame.reindex(labels,level=0)`` with with reordered labels (:issue:`4088`) pandas 0.13.1 ------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index 0343475be377e..aad5ccff72e3f 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1426,12 +1426,24 @@ def join(self, other, how='left', level=None, return_indexers=False): else: return self._join_non_unique(other, how=how, return_indexers=return_indexers) - elif self.is_monotonic and other.is_monotonic: - try: - return self._join_monotonic(other, how=how, - return_indexers=return_indexers) - except TypeError: - pass + elif self.is_monotonic: + if other.is_monotonic: + try: + return self._join_monotonic(other, how=how, + return_indexers=return_indexers) + except TypeError: + pass + else: + + # we have a reordering of left by right + if how == 'right': + join_index = self + + if return_indexers: + lindexer = other.get_indexer(self) + return join_index, lindexer, None + else: + return join_index if how == 'left': join_index = self @@ -1516,8 +1528,7 @@ def _join_level(self, other, level, how='left', return_indexers=False): """ The join method *only* affects the level of the resulting MultiIndex. Otherwise it just exactly aligns the Index data to the - labels of the level in the MultiIndex. The order of the data indexed by - the MultiIndex will not be changed (currently) + labels of the level in the MultiIndex. """ if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): raise TypeError('Join on level between two MultiIndex objects ' @@ -1556,7 +1567,12 @@ def _join_level(self, other, level, how='left', return_indexers=False): join_index = MultiIndex(levels=new_levels, labels=new_labels, names=left.names, verify_integrity=False) - left_indexer = np.arange(len(left))[new_lev_labels != -1] + + # if we have a reordering of the lindexer, use it + left_indexer = np.arange(len(left))[omit_mask] + lindexer = Int64Index(left_lev_indexer[left_lev_indexer != -1]) + if not lindexer.is_monotonic: + left_indexer = new_lev_labels.argsort() else: join_index = left left_indexer = None diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index aef4e3a72c099..baafc52fac6ef 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -114,6 +114,23 @@ def test_reindex_level(self): expected = self.ymd.groupby(level='month').transform(np.sum).T assert_frame_equal(result, expected) + # GH 4088 + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + tuples = lzip(*arrays) + index = MultiIndex.from_tuples(tuples, names=['first', 'second']) + df = DataFrame({"a":list("abcdefgh"), "b":list("abcdefgh")}, index=index) + order = ["baz", "bar", "foo", "qux"] + + arrays = [['baz', 'baz', 'bar', 'bar', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + tuples = lzip(*arrays) + index = MultiIndex.from_tuples(tuples, names=['first', 'second']) + expected = DataFrame({"a":list("cdabefgh"), "b":list("cdabefgh")}, index=index) + + result = df.reindex(order, level=0) + assert_frame_equal(result, expected) + def test_binops_level(self): def _check_op(opname): op = getattr(DataFrame, opname)