diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 9fb039de8f73a..08ea347708b8f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -412,6 +412,7 @@ Groupby/resample/rolling the function operated on the whole index rather than each element of the index. (:issue:`51979`) - Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`) - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) +- Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) - diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c32c96077bde7..5928c32e22b7f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -70,7 +70,10 @@ class providing the base-class of operations. ) from pandas.util._exceptions import find_stack_level -from pandas.core.dtypes.cast import ensure_dtype_can_hold_na +from pandas.core.dtypes.cast import ( + coerce_indexer_dtype, + ensure_dtype_can_hold_na, +) from pandas.core.dtypes.common import ( is_bool_dtype, is_float_dtype, @@ -4309,13 +4312,19 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde MultiIndex """ nqs = len(qs) + lev_codes, lev = Index(qs).factorize() + lev_codes = coerce_indexer_dtype(lev_codes, lev) if idx._is_multi: idx = cast(MultiIndex, idx) - lev_codes, lev = Index(qs).factorize() levels = list(idx.levels) + [lev] codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))] mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None]) else: - mi = MultiIndex.from_product([idx, qs]) + nidx = len(idx) + idx_codes = coerce_indexer_dtype(np.arange(nidx), idx) + levels = [idx, lev] + codes = [np.repeat(idx_codes, nqs), np.tile(lev_codes, nidx)] + mi = MultiIndex(levels=levels, codes=codes, names=[idx.name, None]) + return mi diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 5801223c0e718..7bf168944a1ac 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -471,3 +471,33 @@ def test_groupby_quantile_dt64tz_period(): expected.index = expected.index.astype(np.int_) tm.assert_frame_equal(result, expected) + + +def test_groupby_quantile_nonmulti_levels_order(): + # Non-regression test for GH #53009 + ind = pd.MultiIndex.from_tuples( + [ + (0, "a", "B"), + (0, "a", "A"), + (0, "b", "B"), + (0, "b", "A"), + (1, "a", "B"), + (1, "a", "A"), + (1, "b", "B"), + (1, "b", "A"), + ], + names=["sample", "cat0", "cat1"], + ) + ser = pd.Series(range(8), index=ind) + result = ser.groupby(level="cat1", sort=False).quantile([0.2, 0.8]) + + qind = pd.MultiIndex.from_tuples( + [("B", 0.2), ("B", 0.8), ("A", 0.2), ("A", 0.8)], names=["cat1", None] + ) + expected = pd.Series([1.2, 4.8, 2.2, 5.8], index=qind) + + tm.assert_series_equal(result, expected) + + # We need to check that index levels are not sorted + expected_levels = pd.core.indexes.frozen.FrozenList([["B", "A"], [0.2, 0.8]]) + tm.assert_equal(result.index.levels, expected_levels)