diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 0d0f4c66c1fec..3c652aafd7d2a 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -95,6 +95,8 @@ Bug Fixes - Bug in ``pd.eval`` where unary ops in a list error (:issue:`11235`) - Bug in ``squeeze()`` with zero length arrays (:issue:`11230`, :issue:`8999`) +- Bug in ``describe()`` dropping column names for hierarchical indexes (:issue:`11517`) + diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f46296bb6f70c..99ee50a9ae7fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4516,6 +4516,7 @@ def describe_1d(data, percentiles): if name not in names: names.append(name) d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1) + d.columns.names = data.columns.names return d def _check_percentile(self, q): diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index d29673e96ecdd..22a1c0573d45a 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1471,6 +1471,25 @@ def test_describe_typefiltering_groupby(self): self.assertTrue(G.describe(include=['number', 'object']).shape == (22, 3)) self.assertTrue(G.describe(include='all').shape == (26, 4)) + def test_describe_multi_index_df_column_names(self): + """ Test that column names persist after the describe operation.""" + + df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + # GH 11517 + # test for hierarchical index + hierarchical_index_df = df.groupby(['A', 'B']).mean().T + self.assertTrue(hierarchical_index_df.columns.names == ['A', 'B']) + self.assertTrue(hierarchical_index_df.describe().columns.names == ['A', 'B']) + + # test for non-hierarchical index + non_hierarchical_index_df = df.groupby(['A']).mean().T + self.assertTrue(non_hierarchical_index_df.columns.names == ['A']) + self.assertTrue(non_hierarchical_index_df.describe().columns.names == ['A']) + def test_no_order(self): tm._skip_if_no_scipy() s = Series([0, 1, np.nan, 3])