From d998337da5c33b94b919968dff6f2a5a7d88ed68 Mon Sep 17 00:00:00 2001 From: Jake VanderPlas Date: Fri, 4 Sep 2015 12:57:19 -0700 Subject: [PATCH 1/2] BUG: quick fix for #10989 TST: add test case from Issue #10989 --- pandas/tools/pivot.py | 14 ++++++++++++++ pandas/tools/tests/test_pivot.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 89fe9463282b6..a4a175fb75716 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -159,6 +159,20 @@ def _add_margins(table, data, values, rows, cols, aggfunc): grand_margin = _compute_grand_margin(data, values, aggfunc) + # categorical index or columns will fail below when 'All' is added + # here we'll convert all categorical indices to object + def convert_categorical(ind): + _convert = lambda ind: (ind.astype('object') + if ind.dtype.name == 'category' else ind) + if isinstance(ind, MultiIndex): + return ind.set_levels([_convert(lev) for lev in ind.levels]) + else: + return _convert(ind) + + table.index = convert_categorical(table.index) + if hasattr(table, 'columns'): + table.columns = convert_categorical(table.columns) + if not values and isinstance(table, Series): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 50ae574c03067..fca208ab24042 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -719,6 +719,20 @@ def test_crosstab_dropna(self): ('two', 'dull'), ('two', 'shiny')]) assert_equal(res.columns.values, m.values) + def test_categorical_margins(self): + # GH 10989 + data = pd.DataFrame({'x': np.arange(8), + 'y': np.arange(8) // 4, + 'z': np.arange(8) % 2}) + data.y = data.y.astype('category') + data.z = data.z.astype('category') + table = data.pivot_table('x', 'y', 'z', margins=True) + assert_equal(table.values, [[1, 2, 1.5], + [5, 6, 5.5], + [3, 4, 3.5]]) + + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From 7ca878eab8e7353926c5f24b709a77901e1b35b7 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 18 Oct 2015 20:25:57 -0400 Subject: [PATCH 2/2] API: add _to_safe_for_reshape to allow safe insert/append with embedded CategoricalIndexes Signed-off-by: Jeff Reback --- doc/source/whatsnew/v0.17.1.txt | 2 +- pandas/core/index.py | 12 ++++++++++++ pandas/core/internals.py | 6 ++++-- pandas/tools/pivot.py | 31 +++++++++++++++---------------- pandas/tools/tests/test_pivot.py | 22 ++++++++++++++-------- 5 files changed, 46 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index ea2b85d983ade..bdfbf08b37e57 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -87,7 +87,7 @@ Bug Fixes - Bug in list-like indexing with a mixed-integer Index (:issue:`11320`) - +- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`) - Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`) diff --git a/pandas/core/index.py b/pandas/core/index.py index ede848c1103ab..7049ac33feac6 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -627,6 +627,10 @@ def astype(self, dtype): return Index(self.values.astype(dtype), name=self.name, dtype=dtype) + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self + def to_datetime(self, dayfirst=False): """ For an Index containing strings or datetime.datetime objects, attempt @@ -3190,6 +3194,10 @@ def duplicated(self, keep='first'): from pandas.hashtable import duplicated_int64 return duplicated_int64(self.codes.astype('i8'), keep) + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.astype('object') + def get_loc(self, key, method=None): """ Get integer location for requested label @@ -4529,6 +4537,10 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, else: return result_levels + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.set_levels([ i._to_safe_for_reshape() for i in self.levels ]) + def to_hierarchical(self, n_repeat, n_shuffle=1): """ Return a MultiIndex reshaped to conform to the diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d98121520b8b0..f1d82ec1f3b2e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3427,6 +3427,9 @@ def insert(self, loc, item, value, allow_duplicates=False): if not isinstance(loc, int): raise TypeError("loc must be int") + # insert to the axis; this could possibly raise a TypeError + new_axis = self.items.insert(loc, item) + block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc+1)) @@ -3449,8 +3452,7 @@ def insert(self, loc, item, value, allow_duplicates=False): self._blklocs = np.insert(self._blklocs, loc, 0) self._blknos = np.insert(self._blknos, loc, len(self.blocks)) - self.axes[0] = self.items.insert(loc, item) - + self.axes[0] = new_axis self.blocks += (block,) self._shape = None diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index a4a175fb75716..de7a5f5a73f3d 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -159,20 +159,6 @@ def _add_margins(table, data, values, rows, cols, aggfunc): grand_margin = _compute_grand_margin(data, values, aggfunc) - # categorical index or columns will fail below when 'All' is added - # here we'll convert all categorical indices to object - def convert_categorical(ind): - _convert = lambda ind: (ind.astype('object') - if ind.dtype.name == 'category' else ind) - if isinstance(ind, MultiIndex): - return ind.set_levels([_convert(lev) for lev in ind.levels]) - else: - return _convert(ind) - - table.index = convert_categorical(table.index) - if hasattr(table, 'columns'): - table.columns = convert_categorical(table.columns) - if not values and isinstance(table, Series): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. @@ -203,7 +189,13 @@ def convert_categorical(ind): margin_dummy = DataFrame(row_margin, columns=[key]).T row_names = result.index.names - result = result.append(margin_dummy) + try: + result = result.append(margin_dummy) + except TypeError: + + # we cannot reshape, so coerce the axis + result.index = result.index._to_safe_for_reshape() + result = result.append(margin_dummy) result.index.names = row_names return result @@ -232,6 +224,7 @@ def _compute_grand_margin(data, values, aggfunc): def _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin): + if len(cols) > 0: # need to "interleave" the margins table_pieces = [] @@ -249,7 +242,13 @@ def _all_key(key): # we are going to mutate this, so need to copy! piece = piece.copy() - piece[all_key] = margin[key] + try: + piece[all_key] = margin[key] + except TypeError: + + # we cannot reshape, so coerce the axis + piece.set_axis(cat_axis, piece._get_axis(cat_axis)._to_safe_for_reshape()) + piece[all_key] = margin[key] table_pieces.append(piece) margin_keys.append(all_key) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index fca208ab24042..f0052774d66a2 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -721,17 +721,23 @@ def test_crosstab_dropna(self): def test_categorical_margins(self): # GH 10989 - data = pd.DataFrame({'x': np.arange(8), - 'y': np.arange(8) // 4, - 'z': np.arange(8) % 2}) + df = pd.DataFrame({'x': np.arange(8), + 'y': np.arange(8) // 4, + 'z': np.arange(8) % 2}) + + expected = pd.DataFrame([[1.0, 2.0, 1.5],[5, 6, 5.5],[3, 4, 3.5]]) + expected.index = Index([0,1,'All'],name='y') + expected.columns = Index([0,1,'All'],name='z') + + data = df.copy() + table = data.pivot_table('x', 'y', 'z', margins=True) + tm.assert_frame_equal(table, expected) + + data = df.copy() data.y = data.y.astype('category') data.z = data.z.astype('category') table = data.pivot_table('x', 'y', 'z', margins=True) - assert_equal(table.values, [[1, 2, 1.5], - [5, 6, 5.5], - [3, 4, 3.5]]) - - + tm.assert_frame_equal(table, expected) if __name__ == '__main__': import nose