From 9abc4e8d9ec2d6f9db0a64f1ef0ca1f8dbcaacc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20P=C3=BCls?= Date: Sun, 3 Jul 2016 02:13:46 +0200 Subject: [PATCH 1/4] BUG: Fix .to_excel() for MultiIndex containing a NaN value #13511 --- doc/source/whatsnew/v0.19.0.txt | 2 ++ pandas/formats/format.py | 7 ++++++- pandas/io/tests/test_excel.py | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 317383e866464..1977548883e9e 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -767,3 +767,5 @@ Bug Fixes - Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`) - Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`) - Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`) + +- Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 436a9d5d5d4c8..8824ddb4e10b4 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1839,7 +1839,12 @@ def _format_hierarchical_rows(self): for spans, levels, labels in zip(level_lengths, self.df.index.levels, self.df.index.labels): - values = levels.take(labels) + + if levels._can_hold_na: + values = levels.take(labels, fill_value=True) + else: + values = levels.take(labels) + for i in spans: if spans[i] > 1: yield ExcelCell(self.rowcounter + i, gcolidx, diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 55a7f5350719d..ae01072677d92 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1328,6 +1328,20 @@ def test_to_excel_multiindex(self): parse_dates=False) tm.assert_frame_equal(frame, df) + # GH13511 + def test_to_excel_multiindex_nan_label(self): + _skip_if_no_xlrd() + + frame = self.frame + frame.A = np.arange(len(frame)) + frame.iloc[0, 0] = None + frame.set_index(['A', 'B'], inplace=True) + + with ensure_clean(self.ext) as path: + frame.to_excel(path, merge_cells=self.merge_cells) + df = read_excel(path, index_col=[0, 1]) + tm.assert_frame_equal(frame, df) + # Test for Issue 11328. If column indices are integers, make # sure they are handled correctly for either setting of # merge_cells From ba41db6effd85235c4bc1dccef7b8818718ed356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20P=C3=BCls?= Date: Mon, 4 Jul 2016 13:24:23 +0200 Subject: [PATCH 2/4] CLN: Get rid of conditional. --- pandas/formats/format.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 8824ddb4e10b4..20cc575f7e4f2 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1840,10 +1840,8 @@ def _format_hierarchical_rows(self): self.df.index.levels, self.df.index.labels): - if levels._can_hold_na: - values = levels.take(labels, fill_value=True) - else: - values = levels.take(labels) + values = levels.take(labels, allow_fill=levels._can_hold_na, + fill_value=True) for i in spans: if spans[i] > 1: From 335cf863b68145c921e23c6704829434cad3b1e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20P=C3=BCls?= Date: Sun, 3 Jul 2016 02:13:46 +0200 Subject: [PATCH 3/4] BUG: Fix .to_excel() for MultiIndex containing a NaN value #13511 --- pandas/formats/format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 20cc575f7e4f2..50d54ddb95100 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1840,7 +1840,8 @@ def _format_hierarchical_rows(self): self.df.index.levels, self.df.index.labels): - values = levels.take(labels, allow_fill=levels._can_hold_na, + values = levels.take(labels, + allow_fill=levels._can_hold_na, fill_value=True) for i in spans: From 2335ceefa7b8647fcc191cb065a4ed87b3cba88f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20P=C3=BCls?= Date: Sun, 24 Jul 2016 22:47:44 +0200 Subject: [PATCH 4/4] TST: Construct DataFrame specifically for test, instead of reusing existing one. --- pandas/io/tests/test_excel.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index ae01072677d92..34e47ebcfcf5a 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1332,10 +1332,10 @@ def test_to_excel_multiindex(self): def test_to_excel_multiindex_nan_label(self): _skip_if_no_xlrd() - frame = self.frame - frame.A = np.arange(len(frame)) - frame.iloc[0, 0] = None - frame.set_index(['A', 'B'], inplace=True) + frame = pd.DataFrame({'A': [None, 2, 3], + 'B': [10, 20, 30], + 'C': np.random.sample(3)}) + frame = frame.set_index(['A', 'B']) with ensure_clean(self.ext) as path: frame.to_excel(path, merge_cells=self.merge_cells)