From e3c54e2e8e213799f22672376da47d9b8ea34195 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 6 Nov 2022 09:17:41 -0500 Subject: [PATCH 1/4] MultiIndex.value_counts to retain index type --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/algorithms.py | 5 +++++ pandas/tests/test_algos.py | 10 ++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c76555f9ef417..6b12220a606db 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -556,6 +556,7 @@ MultiIndex - Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`) - Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`) - Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`) +- Bug in :meth:`MultiIndex.value_counts` returning :class:`Series` indexed by flat index of tuples (:issue:`#####`) - I/O diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aca5c4345d247..a898fa510e089 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -950,6 +950,11 @@ def value_counts( result.name = name counts = result._values + elif isinstance(values, ABCMultiIndex): + levels = list(range(values.nlevels)) + result = Series(index=values).groupby(level=levels, dropna=dropna).size() + counts = result._values + else: values = _ensure_arraylike(values) keys, counts = value_counts_arraylike(values, dropna) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a6b765117f616..112cbf82129fc 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1350,6 +1350,16 @@ def test_value_counts_uint64(self): tm.assert_series_equal(result, expected) + def test_value_counts_multiindex(self): + # GH# (TODO) + mi = MultiIndex.from_tuples( + tuples=[(1, "a"), (2, "b"), (3, "c"), (2, "b")], + names=["foo", "bar"], + ) + result = algos.value_counts(mi) + expected = Series(data=[2, 1, 1], index=mi.take([1, 0, 2])) + tm.assert_series_equal(result, expected) + class TestDuplicated: def test_duplicated_with_nas(self): From 2cafcc63cd132c3b33d2956129906c61194ed0b6 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 6 Nov 2022 09:24:20 -0500 Subject: [PATCH 2/4] gh refs --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/algorithms.py | 1 + pandas/tests/test_algos.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 6b12220a606db..a71a8cff4e841 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -556,7 +556,7 @@ MultiIndex - Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`) - Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`) - Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`) -- Bug in :meth:`MultiIndex.value_counts` returning :class:`Series` indexed by flat index of tuples (:issue:`#####`) +- Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples (:issue:`49558`) - I/O diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a898fa510e089..a91fd3e2bab68 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -951,6 +951,7 @@ def value_counts( counts = result._values elif isinstance(values, ABCMultiIndex): + # GH49558 levels = list(range(values.nlevels)) result = Series(index=values).groupby(level=levels, dropna=dropna).size() counts = result._values diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 112cbf82129fc..845bdad8f2d3d 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1351,7 +1351,7 @@ def test_value_counts_uint64(self): tm.assert_series_equal(result, expected) def test_value_counts_multiindex(self): - # GH# (TODO) + # GH49558 mi = MultiIndex.from_tuples( tuples=[(1, "a"), (2, "b"), (3, "c"), (2, "b")], names=["foo", "bar"], From 8de8c903805668511ffd58b0c370ec3d4f7f0e88 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 6 Nov 2022 18:14:11 -0500 Subject: [PATCH 3/4] update test --- pandas/core/algorithms.py | 2 ++ pandas/tests/base/test_value_counts.py | 2 -- pandas/tests/test_algos.py | 10 ---------- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a91fd3e2bab68..6b4fd6f48e2fd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -954,6 +954,8 @@ def value_counts( # GH49558 levels = list(range(values.nlevels)) result = Series(index=values).groupby(level=levels, dropna=dropna).size() + # TODO: allow index names to remain (see discussion in GH49497) + result.index.names = [None] * values.nlevels counts = result._values else: diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index f6ad3f24434d3..dafbd9fee1b8e 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -29,8 +29,6 @@ def test_value_counts(index_or_series_obj): counter = collections.Counter(obj) expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) expected.index = expected.index.astype(obj.dtype) - if isinstance(obj, pd.MultiIndex): - expected.index = Index(expected.index) if not isinstance(result.dtype, np.dtype): # i.e IntegerDtype diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 845bdad8f2d3d..a6b765117f616 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1350,16 +1350,6 @@ def test_value_counts_uint64(self): tm.assert_series_equal(result, expected) - def test_value_counts_multiindex(self): - # GH49558 - mi = MultiIndex.from_tuples( - tuples=[(1, "a"), (2, "b"), (3, "c"), (2, "b")], - names=["foo", "bar"], - ) - result = algos.value_counts(mi) - expected = Series(data=[2, 1, 1], index=mi.take([1, 0, 2])) - tm.assert_series_equal(result, expected) - class TestDuplicated: def test_duplicated_with_nas(self): From e586a30659e3aafb911076af146825b407101312 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 7 Nov 2022 19:21:11 -0500 Subject: [PATCH 4/4] update whatsnew --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c3d97c3abe146..5f7d2af92e839 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -568,7 +568,7 @@ MultiIndex - Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`) - Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`) - Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`) -- Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples (:issue:`49558`) +- Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples instead of a :class:`MultiIndex` (:issue:`49558`) - I/O