From e445f876c30fbef97a3388c1367efd5301d1fe6e Mon Sep 17 00:00:00 2001 From: Andrew Carlson Date: Wed, 6 Nov 2019 14:24:47 -0500 Subject: [PATCH 1/3] add unit tests for issue #19351 --- pandas/tests/test_multilevel.py | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 79c9fe2b60bd9..41c0a5f4b70c2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -358,6 +358,52 @@ def test_unstack(self): # test that int32 work self.ymd.astype(np.int32).unstack() + def test_unstack_partial(self): + # check for regressions on this issue: https://github.com/pandas-dev/pandas/issues/19351 + # make sure DataFrame.unstack() works when its run on a subset of the DataFrame + # and the Index levels contain values that are not present in the subset + result1 = pd.DataFrame( + [[1, 1, None, None, 30.0, None], [2, 2, None, None, 30.0, None]], + columns=[u"ix1", u"ix2", u"col1", u"col2", u"col3", u"col4"], + ).set_index([u"ix1", "ix2"]) + result1 = result1.iloc[1:2].unstack("ix2") + expected1 = pd.DataFrame( + [[None, None, 30.0, None]], + columns=pd.MultiIndex.from_product( + [["col1", "col2", "col3", "col4"], [2]], names=[None, "ix2"] + ), + index=pd.Index([2], name="ix1"), + ) + tm.assert_frame_equal(result1, expected1) + + result2 = pd.DataFrame( + [[1, 1, None, None, 30.0], [2, 2, None, None, 30.0]], + columns=[u"ix1", u"ix2", u"col1", u"col2", u"col3"], + ).set_index([u"ix1", "ix2"]) + result2 = result2.iloc[1:2].unstack("ix2") + expected2 = pd.DataFrame( + [[None, None, 30.0]], + columns=pd.MultiIndex.from_product( + [["col1", "col2", "col3"], [2]], names=[None, "ix2"] + ), + index=pd.Index([2], name="ix1"), + ) + tm.assert_frame_equal(result2, expected2) + + result3 = pd.DataFrame( + [[1, 1, None, None, 30.0], [2, None, None, None, 30.0]], + columns=[u"ix1", u"ix2", u"col1", u"col2", u"col3"], + ).set_index([u"ix1", "ix2"]) + result3 = result3.iloc[1:2].unstack("ix2") + expected3 = pd.DataFrame( + [[None, None, 30.0]], + columns=pd.MultiIndex.from_product( + [["col1", "col2", "col3"], [None]], names=[None, "ix2"] + ), + index=pd.Index([2], name="ix1"), + ) + tm.assert_frame_equal(result3, expected3) + def test_unstack_multiple_no_empty_columns(self): index = MultiIndex.from_tuples( [(0, "foo", 0), (0, "bar", 0), (1, "baz", 1), (1, "qux", 1)] From 3b17db7ab0f5b54190fa187555980d70e56cde58 Mon Sep 17 00:00:00 2001 From: Andrew Carlson Date: Wed, 6 Nov 2019 14:28:18 -0500 Subject: [PATCH 2/3] fix line too long --- pandas/tests/test_multilevel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 41c0a5f4b70c2..90bc0bdc7c295 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -359,7 +359,8 @@ def test_unstack(self): self.ymd.astype(np.int32).unstack() def test_unstack_partial(self): - # check for regressions on this issue: https://github.com/pandas-dev/pandas/issues/19351 + # check for regressions on this issue: + # https://github.com/pandas-dev/pandas/issues/19351 # make sure DataFrame.unstack() works when its run on a subset of the DataFrame # and the Index levels contain values that are not present in the subset result1 = pd.DataFrame( From 57e7258fa6f37ea5c5a190664f12b859abc757a0 Mon Sep 17 00:00:00 2001 From: Andrew Carlson Date: Wed, 6 Nov 2019 15:02:49 -0500 Subject: [PATCH 3/3] use pytest.mark.parametrize instead --- pandas/tests/test_multilevel.py | 70 ++++++++++++++++----------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 90bc0bdc7c295..a1f58922ea0ca 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -358,52 +358,48 @@ def test_unstack(self): # test that int32 work self.ymd.astype(np.int32).unstack() - def test_unstack_partial(self): + @pytest.mark.parametrize( + "result_rows,result_columns,index_product,expected_row", + [ + ( + [[1, 1, None, None, 30.0, None], [2, 2, None, None, 30.0, None]], + [u"ix1", u"ix2", u"col1", u"col2", u"col3", u"col4"], + 2, + [None, None, 30.0, None], + ), + ( + [[1, 1, None, None, 30.0], [2, 2, None, None, 30.0]], + [u"ix1", u"ix2", u"col1", u"col2", u"col3"], + 2, + [None, None, 30.0], + ), + ( + [[1, 1, None, None, 30.0], [2, None, None, None, 30.0]], + [u"ix1", u"ix2", u"col1", u"col2", u"col3"], + None, + [None, None, 30.0], + ), + ], + ) + def test_unstack_partial( + self, result_rows, result_columns, index_product, expected_row + ): # check for regressions on this issue: # https://github.com/pandas-dev/pandas/issues/19351 # make sure DataFrame.unstack() works when its run on a subset of the DataFrame # and the Index levels contain values that are not present in the subset - result1 = pd.DataFrame( - [[1, 1, None, None, 30.0, None], [2, 2, None, None, 30.0, None]], - columns=[u"ix1", u"ix2", u"col1", u"col2", u"col3", u"col4"], - ).set_index([u"ix1", "ix2"]) - result1 = result1.iloc[1:2].unstack("ix2") - expected1 = pd.DataFrame( - [[None, None, 30.0, None]], - columns=pd.MultiIndex.from_product( - [["col1", "col2", "col3", "col4"], [2]], names=[None, "ix2"] - ), - index=pd.Index([2], name="ix1"), - ) - tm.assert_frame_equal(result1, expected1) - - result2 = pd.DataFrame( - [[1, 1, None, None, 30.0], [2, 2, None, None, 30.0]], - columns=[u"ix1", u"ix2", u"col1", u"col2", u"col3"], - ).set_index([u"ix1", "ix2"]) - result2 = result2.iloc[1:2].unstack("ix2") - expected2 = pd.DataFrame( - [[None, None, 30.0]], - columns=pd.MultiIndex.from_product( - [["col1", "col2", "col3"], [2]], names=[None, "ix2"] - ), - index=pd.Index([2], name="ix1"), + result = pd.DataFrame(result_rows, columns=result_columns).set_index( + [u"ix1", "ix2"] ) - tm.assert_frame_equal(result2, expected2) - - result3 = pd.DataFrame( - [[1, 1, None, None, 30.0], [2, None, None, None, 30.0]], - columns=[u"ix1", u"ix2", u"col1", u"col2", u"col3"], - ).set_index([u"ix1", "ix2"]) - result3 = result3.iloc[1:2].unstack("ix2") - expected3 = pd.DataFrame( - [[None, None, 30.0]], + result = result.iloc[1:2].unstack("ix2") + expected = pd.DataFrame( + [expected_row], columns=pd.MultiIndex.from_product( - [["col1", "col2", "col3"], [None]], names=[None, "ix2"] + [result_columns[2:], [index_product]], names=[None, "ix2"] ), index=pd.Index([2], name="ix1"), ) - tm.assert_frame_equal(result3, expected3) + tm.assert_frame_equal(result, expected) def test_unstack_multiple_no_empty_columns(self): index = MultiIndex.from_tuples(