diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index c9cd09cba0b39..f82a18f50063a 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -118,7 +118,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - ``usecols`` parameter in ``pd.read_csv`` is now respected even when the lines of a CSV file are not even (:issue:`12203`) - +- Bug in ``groupby.transform(..)`` when ``axis=1`` is specified with a non-monotonic ordered index (:issue:` `) - Bug in ``Period`` and ``PeriodIndex`` creation raises ``KeyError`` if ``freq="Minute"`` is specified. Note that "Minute" freq is deprecated in v0.17.0, and recommended to use ``freq="T"`` instead (:issue:`11854`) - Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`) - Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f3fe5a5a2d5d8..398e37d52d7ba 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -457,19 +457,18 @@ def _set_selection_from_grouper(self): self._group_selection = ax.difference(Index(groupers)).tolist() def _set_result_index_ordered(self, result): - # set the result index on the passed values object - # return the new object - # related 8046 + # set the result index on the passed values object and + # return the new object, xref 8046 # the values/counts are repeated according to the group index - # shortcut of we have an already ordered grouper + # shortcut if we have an already ordered grouper if not self.grouper.is_monotonic: index = Index(np.concatenate( self._get_indices(self.grouper.result_index))) - result.index = index - result = result.sort_index() + result.set_axis(self.axis, index) + result = result.sort_index(axis=self.axis) - result.index = self.obj.index + result.set_axis(self.axis, self.obj._get_axis(self.axis)) return result def _dir_additions(self): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 947daab2017d3..e823874f85cec 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1090,6 +1090,45 @@ def test_transform_broadcast(self): for idx in gp.index: assert_fp_equal(res.xs(idx), agged[idx]) + def test_transform_axis(self): + + # make sure that we are setting the axes + # correctly when on axis=0 or 1 + # in the presence of a non-monotonic indexer + + base = self.tsframe.iloc[0:5] + r = len(base.index) + c = len(base.columns) + tso = DataFrame(np.random.randn(r, c), + index=base.index, + columns=base.columns, + dtype='float64') + # monotonic + ts = tso + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: x - x.mean()) + assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + assert_frame_equal(result, expected) + + # non-monotonic + ts = tso.iloc[[1, 0] + list(range(2, len(base)))] + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: x - x.mean()) + assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + assert_frame_equal(result, expected) + def test_transform_dtype(self): # GH 9807 # Check transform dtype output is preserved