From 5345d162576e3a3f63bff876904231bff45c7451 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 17 Dec 2023 00:22:29 +0100 Subject: [PATCH 1/4] Adjust pivot tests for new string option --- pandas/tests/reshape/test_pivot.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index dab2b034d3fd4..d12e3d5d68ffb 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -781,7 +781,8 @@ def test_pivot_with_list_like_values(self, values, method): codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], names=[None, "bar"], ) - expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + expected = DataFrame(data=data, index=index, columns=columns) + expected["baz"] = expected["baz"].astype(object) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -824,7 +825,8 @@ def test_pivot_with_list_like_values_nans(self, values, method): codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[None, "foo"], ) - expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + expected = DataFrame(data=data, index=index, columns=columns) + expected["baz"] = expected["baz"].astype(object) tm.assert_frame_equal(result, expected) def test_pivot_columns_none_raise_error(self): @@ -949,7 +951,7 @@ def test_no_col(self, data): # to help with a buglet data.columns = [k * 2 for k in data.columns] - msg = re.escape("agg function failed [how->mean,dtype->object]") + msg = re.escape("agg function failed [how->mean,dtype->") with pytest.raises(TypeError, match=msg): data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") table = data.drop(columns="CC").pivot_table( @@ -1022,7 +1024,7 @@ def test_margin_with_only_columns_defined( } ) if aggfunc != "sum": - msg = re.escape("agg function failed [how->mean,dtype->object]") + msg = re.escape("agg function failed [how->mean,dtype->") with pytest.raises(TypeError, match=msg): df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) if "B" not in columns: @@ -1086,7 +1088,7 @@ def test_pivot_table_multiindex_only(self, cols): expected = DataFrame( [[4.0, 5.0, 6.0]], columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), - index=Index(["v"]), + index=Index(["v"], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -1803,7 +1805,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): margins_name=margins_name, aggfunc=["mean", "max"], ) - ix = Index(["bacon", "cheese", margins_name], dtype="object", name="item") + ix = Index(["bacon", "cheese", margins_name], name="item") tups = [ ("mean", "cost", "ME"), ("mean", "cost", "T"), @@ -1995,7 +1997,7 @@ def test_pivot_table_not_series(self): def test_pivot_margins_name_unicode(self): # issue #13292 greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae" - frame = DataFrame({"foo": [1, 2, 3]}) + frame = DataFrame({"foo": [1, 2, 3]}, columns=Index(["foo"], dtype=object)) table = pivot_table( frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek ) From b6ab35ad9ecf0f426e8e456f62e923efcb1aec65 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 17 Dec 2023 00:24:46 +0100 Subject: [PATCH 2/4] BUG: pivot dropping wrong column level with numeric columns and ea dtype --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/reshape/reshape.py | 2 +- pandas/tests/reshape/test_pivot.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 0c4fb6d3d1164..08ef9ead94efb 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -675,6 +675,7 @@ Reshaping - Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`) - Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`) - Bug in :meth:`DataFrame.pivot_table` where the row margin is incorrect when the columns have numeric names (:issue:`26568`) +- Bug in :meth:`DataFrame.pivot` with numeric columns and extension dtype for data (:issue:`56528`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d6922ba58d2b9..8c822ec58e011 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -572,7 +572,7 @@ def _unstack_extension_series( # equiv: result.droplevel(level=0, axis=1) # but this avoids an extra copy - result.columns = result.columns.droplevel(0) + result.columns = result.columns._drop_level_numbers([0]) return result diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index dab2b034d3fd4..1f84d525a0a89 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2520,8 +2520,9 @@ def test_pivot_empty(self): expected = DataFrame(index=[], columns=[]) tm.assert_frame_equal(result, expected, check_names=False) - def test_pivot_integer_bug(self): - df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")]) + @pytest.mark.parametrize("dtype", [object, "string"]) + def test_pivot_integer_bug(self, dtype): + df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype) result = df.pivot(index=1, columns=0, values=2) tm.assert_index_equal(result.columns, Index(["A", "B"], name=0)) From 89432e2dcc381434391dd83564d3aedd950e0f56 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 17 Dec 2023 00:35:20 +0100 Subject: [PATCH 3/4] Adjust pivot tests for new string option --- pandas/tests/reshape/test_pivot.py | 5 +++++ pandas/tests/reshape/test_union_categoricals.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index c2c3029de6df5..d575a74744330 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_pyarrow_string_dtype + from pandas.errors import PerformanceWarning import pandas as pd @@ -2609,6 +2611,7 @@ def test_pivot_columns_not_given(self): with pytest.raises(TypeError, match="missing 1 required keyword-only argument"): df.pivot() # pylint: disable=missing-kwoa + @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="None is cast to NaN") def test_pivot_columns_is_none(self): # GH#48293 df = DataFrame({None: [1], "b": 2, "c": 3}) @@ -2624,6 +2627,7 @@ def test_pivot_columns_is_none(self): expected = DataFrame({1: 3}, index=Index([2], name="b")) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="None is cast to NaN") def test_pivot_index_is_none(self): # GH#48293 df = DataFrame({None: [1], "b": 2, "c": 3}) @@ -2637,6 +2641,7 @@ def test_pivot_index_is_none(self): expected = DataFrame(3, index=[1], columns=Index([2], name="b")) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="None is cast to NaN") def test_pivot_values_is_none(self): # GH#48293 df = DataFrame({None: [1], "b": 2, "c": 3}) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 7505d69aee134..8d78d34e936f0 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -110,7 +110,7 @@ def test_union_categoricals_nan(self): res = union_categoricals( [ Categorical(np.array([np.nan, np.nan], dtype=object)), - Categorical(["X"]), + Categorical(["X"], categories=pd.Index(["X"], dtype=object)), ] ) exp = Categorical([np.nan, np.nan, "X"]) @@ -123,8 +123,10 @@ def test_union_categoricals_nan(self): tm.assert_categorical_equal(res, exp) @pytest.mark.parametrize("val", [[], ["1"]]) - def test_union_categoricals_empty(self, val): + def test_union_categoricals_empty(self, val, request, using_infer_string): # GH 13759 + if using_infer_string and val == ["1"]: + request.applymarker(pytest.mark.xfail("object and strings dont match")) res = union_categoricals([Categorical([]), Categorical(val)]) exp = Categorical(val) tm.assert_categorical_equal(res, exp) From d75358a965f936b4bae920157b999e1a06a9915f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 17 Dec 2023 18:48:09 +0100 Subject: [PATCH 4/4] Fixup --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1f84d525a0a89..d9ae8ddfb930e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2525,7 +2525,7 @@ def test_pivot_integer_bug(self, dtype): df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype) result = df.pivot(index=1, columns=0, values=2) - tm.assert_index_equal(result.columns, Index(["A", "B"], name=0)) + tm.assert_index_equal(result.columns, Index(["A", "B"], name=0, dtype=dtype)) def test_pivot_index_none(self): # GH#3962