From 22a6b37386dfcd8007aa89875e2e7390449dce44 Mon Sep 17 00:00:00 2001 From: Stefan Song Date: Tue, 13 Aug 2024 00:17:21 -0400 Subject: [PATCH 1/4] fix bug for default level behavior --- pandas/core/generic.py | 6 +++++- pandas/tests/series/indexing/test_xs.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0f0078fc3398b..39255f954dd3c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4126,7 +4126,11 @@ class animal locomotion index = self.index if isinstance(index, MultiIndex): - loc, new_index = index._get_loc_level(key, level=0) + loc, new_index = index.get_loc_level( + key, + level=range(len(key)), + drop_level=drop_level + ) if not drop_level: if lib.is_integer(loc): # Slice index must be an integer or None diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index a67f3ec708f24..5f093042f3b00 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -1,7 +1,9 @@ import numpy as np import pytest +import debugpy from pandas import ( + DataFrame, MultiIndex, Series, date_range, @@ -80,3 +82,16 @@ def test_xs_key_as_list(self): with pytest.raises(TypeError, match="list keys are not supported"): ser.xs(["a"], axis=0, drop_level=False) + + def test_xs_default_level(self): + # GH#59098 + df = DataFrame(dict(i=[1,2,3], j=[1,1,2], x=[10, 100, 1000])).set_index(["i", "j"]) + key = (1, 1) + + # Both scenarios should return DataFrame + result_with_level = df.xs(key, drop_level=False, level=list(range(len(key)))) + result_with_default = df.xs(key, drop_level=False) + + assert type(result_with_default) == DataFrame + assert type(result_with_level) == type(result_with_default) + tm.assert_equal(result_with_level, result_with_default) From f151dbac225d56446937b5959e08112082c98ba9 Mon Sep 17 00:00:00 2001 From: Stefan Song Date: Tue, 13 Aug 2024 00:37:11 -0400 Subject: [PATCH 2/4] add note and fix styling --- doc/source/whatsnew/v3.0.0.rst | 18 ++++++++++++++++++ pandas/tests/series/indexing/test_xs.py | 3 +-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f26c6506477d4..dddf81fa59aff 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -122,6 +122,24 @@ These improvements also fixed certain bugs in groupby: - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) +.. _whatsnew_300.notable_bug_fixes.xs_function_default_level: + +`DataFrame.xs()` did not work as expected when level was not specified by default. (:issue:`59098`) + +ex. +.. ipython:: python + + df = pd.DataFrame(dict(i=[1,2,3], j=[1,1,2], x=[10, 100, 1000])).set_index(["i", "j"]) + + key = (1, 1) + + # Returns DataFrame as expected: + result1 = df.xs(key, drop_level=False, level=list(range(len(key)))) + + # Returns Series, but DataFrame was expected: + result2 = df.xs(key, drop_level=False) + + .. _whatsnew_300.notable_bug_fixes.notable_bug_fix2: notable_bug_fix2 diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index 5f093042f3b00..d853d9e7ff509 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -1,6 +1,5 @@ import numpy as np import pytest -import debugpy from pandas import ( DataFrame, @@ -82,7 +81,7 @@ def test_xs_key_as_list(self): with pytest.raises(TypeError, match="list keys are not supported"): ser.xs(["a"], axis=0, drop_level=False) - + def test_xs_default_level(self): # GH#59098 df = DataFrame(dict(i=[1,2,3], j=[1,1,2], x=[10, 100, 1000])).set_index(["i", "j"]) From 1a37e0e67b7f938227be0ba90a9021fdae9d1c1b Mon Sep 17 00:00:00 2001 From: Stefan Song Date: Tue, 13 Aug 2024 08:20:55 -0400 Subject: [PATCH 3/4] fix level and mypy warning --- pandas/core/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 39255f954dd3c..e84c28a77774a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -16,6 +16,7 @@ ClassVar, Literal, NoReturn, + Sequence, cast, final, overload, @@ -4126,9 +4127,10 @@ class animal locomotion index = self.index if isinstance(index, MultiIndex): + level = range(len(key)) if isinstance(key, Sequence) else 0 loc, new_index = index.get_loc_level( key, - level=range(len(key)), + level=level, drop_level=drop_level ) if not drop_level: From c2f575c92f4dd1c705e71308c81ed4ac4aadaddc Mon Sep 17 00:00:00 2001 From: Stefan Song Date: Tue, 13 Aug 2024 08:37:36 -0400 Subject: [PATCH 4/4] fix warning --- pandas/core/generic.py | 4 ++-- pandas/tests/series/indexing/test_xs.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e84c28a77774a..de826c731d159 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2,6 +2,7 @@ from __future__ import annotations import collections +import collections.abc from copy import deepcopy import datetime as dt from functools import partial @@ -16,7 +17,6 @@ ClassVar, Literal, NoReturn, - Sequence, cast, final, overload, @@ -4127,7 +4127,7 @@ class animal locomotion index = self.index if isinstance(index, MultiIndex): - level = range(len(key)) if isinstance(key, Sequence) else 0 + level = range(len(key)) if isinstance(key, collections.abc.Sequence) else 0 loc, new_index = index.get_loc_level( key, level=level, diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index d853d9e7ff509..00a008a1a7fbb 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -84,7 +84,9 @@ def test_xs_key_as_list(self): def test_xs_default_level(self): # GH#59098 - df = DataFrame(dict(i=[1,2,3], j=[1,1,2], x=[10, 100, 1000])).set_index(["i", "j"]) + df = DataFrame( + {"i": [1,2,3], "j": [1,1,2], "x": [10, 100, 1000]} + ).set_index(["i", "j"]) key = (1, 1) # Both scenarios should return DataFrame