From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/13] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From 466d1926cc42709a8bcc7d7399ae29e6680fcdea Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Tue, 30 Jul 2019 21:56:45 +0200 Subject: [PATCH 02/13] Recommit the PR --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/reshape/pivot.py | 28 +++++++++++++++++------- pandas/tests/reshape/test_pivot.py | 35 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index fb67decb46b64..a628f79ea440d 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -129,7 +129,7 @@ Reshaping ^^^^^^^^^ - A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) -- +- Bug in :meth:`DataFrame.crosstab` when margins set to True and normalize is not False, an error is raised. (:issue:`27500`) - Sparse diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 79716520f6654..2a224c93c7314 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -612,12 +612,17 @@ def _normalize(table, normalize, margins, margins_name="All"): elif margins is True: + # keep index and column of pivoted table + table_index = table.index + table_columns = table.columns + # drop margins created in pivot_table and only keep the core column_margin = table.loc[:, margins_name].drop(margins_name) - index_margin = table.loc[margins_name, :].drop(margins_name) + # separate cases between multiindex and index + if isinstance(table_index, MultiIndex): + index_margin = table.loc[margins_name, :].drop(margins_name, axis=1) + else: + index_margin = table.loc[margins_name, :].drop(margins_name) table = table.drop(margins_name, axis=1).drop(margins_name) - # to keep index and columns names - table_index_names = table.index.names - table_columns_names = table.columns.names # Normalize core table = _normalize(table, normalize=normalize, margins=False) @@ -627,11 +632,19 @@ def _normalize(table, normalize, margins, margins_name="All"): column_margin = column_margin / column_margin.sum() table = concat([table, column_margin], axis=1) table = table.fillna(0) + table.columns = table_columns elif normalize == "index": - index_margin = index_margin / index_margin.sum() + # index_margin is a dataframe, and use a hacky way: sum(axis=1)[0] + # to get the normalized result, and use sum() instead for series + if isinstance(index_margin, ABCDataFrame): + sum_index_margin = index_margin.sum(axis=1)[0] + else: + sum_index_margin = index_margin.sum() + index_margin = index_margin / sum_index_margin table = table.append(index_margin) table = table.fillna(0) + table.index = table_index elif normalize == "all" or normalize is True: column_margin = column_margin / column_margin.sum() @@ -641,13 +654,12 @@ def _normalize(table, normalize, margins, margins_name="All"): table = table.append(index_margin) table = table.fillna(0) + table.index = table_index + table.columns = table_columns else: raise ValueError("Not a valid normalize argument") - table.index.names = table_index_names - table.columns.names = table_columns_names - else: raise ValueError("Not a valid margins argument") diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index be82e7f595f8c..ac12165aa0fd1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2447,3 +2447,38 @@ def test_crosstab_unsorted_order(self): [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns ) tm.assert_frame_equal(result, expected) + + def test_margin_normalize(self): + df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", + "one", "one", "two", "two"], + "C": ["small", "large", "large", "small", + "small", "large", "small", "small", + "large"], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + # normalize on index + result = pd.crosstab([df.A, df.B], df.C, margins=True, margins_name='Sub-Total', + normalize=0) + expected = pd.DataFrame([[0.5, 0.5], [0.5, 0.5], + [0.666667, 0.333333], [0, 1], + [0.444444, 0.555556]]) + expected.index = MultiIndex(levels=[['Sub-Total', 'bar', 'foo'], + ['', 'one', 'two']], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=['A', 'B']) + expected.columns = Index(['large', 'small'], dtype='object', name='C') + tm.assert_frame_equal(result, expected) + + # normalize on columns + result = pd.crosstab([df.A, df.B], df.C, margins=True, margins_name='Sub-Total', + normalize=1) + expected = pd.DataFrame([[0.25, 0.2, 0.222222], [0.25, 0.2, 0.222222], + [0.5, 0.2, 0.333333], [0, 0.4, 0.222222]]) + expected.columns = Index(['large', 'small', 'Sub-Total'], dtype='object', + name='C') + expected.index = MultiIndex(levels=[['bar', 'foo'], ['one', 'two']], + codes=[[1, 1, 2, 2], [1, 2, 1, 2]], + names=['A, B']) + tm.assert_frame_equal(result, expected) From 95869b8f387e8e166bf8491b6be4f78a0f64da36 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Tue, 30 Jul 2019 23:15:37 +0200 Subject: [PATCH 03/13] correct typo --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index ac12165aa0fd1..211669a90b940 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2480,5 +2480,5 @@ def test_margin_normalize(self): name='C') expected.index = MultiIndex(levels=[['bar', 'foo'], ['one', 'two']], codes=[[1, 1, 2, 2], [1, 2, 1, 2]], - names=['A, B']) + names=['A', 'B']) tm.assert_frame_equal(result, expected) From 1b130a65500b479bf95eb630c958c7e4bfb3ecd5 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Wed, 31 Jul 2019 08:00:53 +0200 Subject: [PATCH 04/13] Correct codes --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 211669a90b940..74dee9f68f8ea 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2479,6 +2479,6 @@ def test_margin_normalize(self): expected.columns = Index(['large', 'small', 'Sub-Total'], dtype='object', name='C') expected.index = MultiIndex(levels=[['bar', 'foo'], ['one', 'two']], - codes=[[1, 1, 2, 2], [1, 2, 1, 2]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=['A', 'B']) tm.assert_frame_equal(result, expected) From 25302d424a7da7143b8c9f8c21fa9a5b6babd09a Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Wed, 31 Jul 2019 19:32:36 +0200 Subject: [PATCH 05/13] Code change based on review --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/reshape/pivot.py | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index a628f79ea440d..f21d6bfc11988 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -129,7 +129,7 @@ Reshaping ^^^^^^^^^ - A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) -- Bug in :meth:`DataFrame.crosstab` when margins set to True and normalize is not False, an error is raised. (:issue:`27500`) +- Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`) - Sparse diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 2a224c93c7314..544c1178ac005 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -615,14 +615,13 @@ def _normalize(table, normalize, margins, margins_name="All"): # keep index and column of pivoted table table_index = table.index table_columns = table.columns - # drop margins created in pivot_table and only keep the core - column_margin = table.loc[:, margins_name].drop(margins_name) - # separate cases between multiindex and index - if isinstance(table_index, MultiIndex): - index_margin = table.loc[margins_name, :].drop(margins_name, axis=1) - else: - index_margin = table.loc[margins_name, :].drop(margins_name) - table = table.drop(margins_name, axis=1).drop(margins_name) + + # save the column and index margin + column_margin = table.iloc[: -1, -1] + index_margin = table.iloc[-1, : -1] + + # keep the core table + table = table.iloc[: -1, : -1] # Normalize core table = _normalize(table, normalize=normalize, margins=False) From bf40467137553fb2b6ab253f55925811c8f60306 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Wed, 31 Jul 2019 19:44:17 +0200 Subject: [PATCH 06/13] Add more test to test robustness --- pandas/tests/reshape/test_pivot.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 74dee9f68f8ea..57e07d546ecf2 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2482,3 +2482,19 @@ def test_margin_normalize(self): codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=['A', 'B']) tm.assert_frame_equal(result, expected) + + # normalize on both index and column + result = pd.crosstab([df.A, df.B], df.C, margins=True, margins_name='Sub-Total', + normalize=True) + expected = pd.DataFrame([[0.111111, 0.111111, 0.222222], + [0.111111, 0.111111, 0.222222], + [0.222222, 0.111111, 0.333333], + [0.000000, 0.222222, 0.222222], + [0.444444, 0.555555, 1]]) + expected.columns = Index(['large', 'small', 'Sub-Total'], dtype='object', + name='C') + expected.index = MultiIndex(levels=[['Sub-Total', 'bar', 'foo'], + ['', 'one', 'two']], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=['A', 'B']) + tm.assert_frame_equal(result, expected) From 77aafcd07c2a85582f0eaaa538b73a0799dc0152 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Wed, 31 Jul 2019 19:49:33 +0200 Subject: [PATCH 07/13] Optimize the code --- pandas/core/reshape/pivot.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 544c1178ac005..94781d533a03d 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -634,13 +634,7 @@ def _normalize(table, normalize, margins, margins_name="All"): table.columns = table_columns elif normalize == "index": - # index_margin is a dataframe, and use a hacky way: sum(axis=1)[0] - # to get the normalized result, and use sum() instead for series - if isinstance(index_margin, ABCDataFrame): - sum_index_margin = index_margin.sum(axis=1)[0] - else: - sum_index_margin = index_margin.sum() - index_margin = index_margin / sum_index_margin + index_margin = index_margin / index_margin.sum() table = table.append(index_margin) table = table.fillna(0) table.index = table_index From b0b90e60b26f85de165456724e77dd1bc5b829c5 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Wed, 31 Jul 2019 19:50:38 +0200 Subject: [PATCH 08/13] Mark issue number in test file --- pandas/tests/reshape/test_pivot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 57e07d546ecf2..e2992997b6915 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2449,6 +2449,7 @@ def test_crosstab_unsorted_order(self): tm.assert_frame_equal(result, expected) def test_margin_normalize(self): + # GH 27500 df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], "B": ["one", "one", "one", "two", "two", From 5c13549f1b60619a66e156bf7e2c3d61e96882a6 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Wed, 31 Jul 2019 20:40:26 +0200 Subject: [PATCH 09/13] Resubmit --- pandas/core/reshape/pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 94781d533a03d..c0c623362efd8 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -611,7 +611,6 @@ def _normalize(table, normalize, margins, margins_name="All"): table = table.fillna(0) elif margins is True: - # keep index and column of pivoted table table_index = table.index table_columns = table.columns From 469f22ea381ae6572bc51e569f0b55a2df2c02a8 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Wed, 31 Jul 2019 20:45:49 +0200 Subject: [PATCH 10/13] Use black to reformat the file --- pandas/core/reshape/pivot.py | 6 +- pandas/tests/reshape/test_pivot.py | 111 ++++++++++++++++++----------- 2 files changed, 73 insertions(+), 44 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index c0c623362efd8..e2db4eab74ddb 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -616,11 +616,11 @@ def _normalize(table, normalize, margins, margins_name="All"): table_columns = table.columns # save the column and index margin - column_margin = table.iloc[: -1, -1] - index_margin = table.iloc[-1, : -1] + column_margin = table.iloc[:-1, -1] + index_margin = table.iloc[-1, :-1] # keep the core table - table = table.iloc[: -1, : -1] + table = table.iloc[:-1, :-1] # Normalize core table = _normalize(table, normalize=normalize, margins=False) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e2992997b6915..03b15d2df1a26 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2450,52 +2450,81 @@ def test_crosstab_unsorted_order(self): def test_margin_normalize(self): # GH 27500 - df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - "bar", "bar", "bar", "bar"], - "B": ["one", "one", "one", "two", "two", - "one", "one", "two", "two"], - "C": ["small", "large", "large", "small", - "small", "large", "small", "small", - "large"], - "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], - "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + df = pd.DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) # normalize on index - result = pd.crosstab([df.A, df.B], df.C, margins=True, margins_name='Sub-Total', - normalize=0) - expected = pd.DataFrame([[0.5, 0.5], [0.5, 0.5], - [0.666667, 0.333333], [0, 1], - [0.444444, 0.555556]]) - expected.index = MultiIndex(levels=[['Sub-Total', 'bar', 'foo'], - ['', 'one', 'two']], - codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], - names=['A', 'B']) - expected.columns = Index(['large', 'small'], dtype='object', name='C') + result = pd.crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0 + ) + expected = pd.DataFrame( + [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]] + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + expected.columns = Index(["large", "small"], dtype="object", name="C") tm.assert_frame_equal(result, expected) # normalize on columns - result = pd.crosstab([df.A, df.B], df.C, margins=True, margins_name='Sub-Total', - normalize=1) - expected = pd.DataFrame([[0.25, 0.2, 0.222222], [0.25, 0.2, 0.222222], - [0.5, 0.2, 0.333333], [0, 0.4, 0.222222]]) - expected.columns = Index(['large', 'small', 'Sub-Total'], dtype='object', - name='C') - expected.index = MultiIndex(levels=[['bar', 'foo'], ['one', 'two']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=['A', 'B']) + result = pd.crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1 + ) + expected = pd.DataFrame( + [ + [0.25, 0.2, 0.222222], + [0.25, 0.2, 0.222222], + [0.5, 0.2, 0.333333], + [0, 0.4, 0.222222], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["bar", "foo"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["A", "B"], + ) tm.assert_frame_equal(result, expected) # normalize on both index and column - result = pd.crosstab([df.A, df.B], df.C, margins=True, margins_name='Sub-Total', - normalize=True) - expected = pd.DataFrame([[0.111111, 0.111111, 0.222222], - [0.111111, 0.111111, 0.222222], - [0.222222, 0.111111, 0.333333], - [0.000000, 0.222222, 0.222222], - [0.444444, 0.555555, 1]]) - expected.columns = Index(['large', 'small', 'Sub-Total'], dtype='object', - name='C') - expected.index = MultiIndex(levels=[['Sub-Total', 'bar', 'foo'], - ['', 'one', 'two']], - codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], - names=['A', 'B']) + result = pd.crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True + ) + expected = pd.DataFrame( + [ + [0.111111, 0.111111, 0.222222], + [0.111111, 0.111111, 0.222222], + [0.222222, 0.111111, 0.333333], + [0.000000, 0.222222, 0.222222], + [0.444444, 0.555555, 1], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) tm.assert_frame_equal(result, expected) From 93a98057e6ab05f252f3ad3be5480a335156dd65 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Mon, 5 Aug 2019 08:41:39 +0200 Subject: [PATCH 11/13] Merge master --- doc/source/whatsnew/v0.25.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index a0a5305f286fc..4463dcd0a75aa 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -127,6 +127,7 @@ Reshaping - A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) - Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`) - :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) +- Sparse ^^^^^^ From 64c0b139b6d4c22db6cf41ea2b896107c618827a Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Mon, 5 Aug 2019 19:46:46 +0200 Subject: [PATCH 12/13] Add assertion for margin name --- pandas/core/reshape/pivot.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index e2db4eab74ddb..0026efa1144af 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -615,7 +615,12 @@ def _normalize(table, normalize, margins, margins_name="All"): table_index = table.index table_columns = table.columns - # save the column and index margin + # save the column and index margin, and check margin name + # is in (for MI cases) or equal to last index/column + if (margins_name not in table.iloc[-1, :].name) | ( + margins_name != table.iloc[:, -1].name + ): + raise ValueError("{} not in pivoted DataFrame".format(margins_name)) column_margin = table.iloc[:-1, -1] index_margin = table.iloc[-1, :-1] From 1e7dc11bfe5244be3f3fb83104ff1498dd778cc8 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Tue, 6 Aug 2019 08:38:58 +0200 Subject: [PATCH 13/13] Rephrase comments and resubmit PR --- pandas/core/reshape/pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0026efa1144af..d653dd87308cf 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -615,8 +615,8 @@ def _normalize(table, normalize, margins, margins_name="All"): table_index = table.index table_columns = table.columns - # save the column and index margin, and check margin name - # is in (for MI cases) or equal to last index/column + # check if margin name is in (for MI cases) or equal to last + # index/column and save the column and index margin if (margins_name not in table.iloc[-1, :].name) | ( margins_name != table.iloc[:, -1].name ):