From 16cb58eda571ce9fa01ebd0d2a3cb1cbafb693d7 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 8 Mar 2020 19:50:15 +0000 Subject: [PATCH 01/15] first attempt --- pandas/core/groupby/generic.py | 5 +++++ pandas/tests/groupby/test_transform.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ac522fc7863b2..70ceb839a4e61 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1452,6 +1452,11 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: # for each col, reshape to to size of original frame # by take operation ids, _, ngroup = self.grouper.group_info + + # Deal with categorical case + ids = np.array([result.index.get_loc(i) for i in self.grouper.result_index])[ + ids + ] output = [] for i, _ in enumerate(result.columns): res = algorithms.take_1d(result.iloc[:, i].values, ids) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 740103eec185a..07d888eed131b 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1196,3 +1196,19 @@ def test_transform_lambda_indexing(): ), ) tm.assert_frame_equal(result, expected) + + +def test_me(): + df = pd.DataFrame( + { + "A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]), + "B": [1, 2, 3], + "C": ["a", "b", "a"], + } + ) + result = df.groupby(["A", "C"]).transform("sum")["B"] + # breakpoint() + df = pd.DataFrame({"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]}) + expected = df.groupby(["A", "C"]).transform("sum")["B"] + # breakpoint() + tm.assert_series_equal(result, expected) From fff989f6976b43da6178b3aaa2247d2a748777bf Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 8 Mar 2020 22:18:18 +0000 Subject: [PATCH 02/15] condition for categorical --- pandas/core/groupby/generic.py | 20 +++++++++++++++----- pandas/tests/groupby/test_transform.py | 4 +--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 70ceb839a4e61..1bdded9ceeeb9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -57,6 +57,7 @@ normalize_keyword_aggregation, ) import pandas.core.algorithms as algorithms +from pandas.core.arrays import Categorical from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -69,7 +70,12 @@ _transform_template, get_groupby, ) -from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, + all_indexes_same, +) import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series @@ -1453,10 +1459,14 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: # by take operation ids, _, ngroup = self.grouper.group_info - # Deal with categorical case - ids = np.array([result.index.get_loc(i) for i in self.grouper.result_index])[ - ids - ] + if any( + isinstance(ping.grouper, (Categorical, CategoricalIndex)) + for ping in self.grouper.groupings + ): + ids = np.array( + [result.index.get_loc(i) for i in self.grouper.result_index] + )[ids] + output = [] for i, _ in enumerate(result.columns): res = algorithms.take_1d(result.iloc[:, i].values, ids) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 07d888eed131b..5422105619117 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1198,7 +1198,7 @@ def test_transform_lambda_indexing(): tm.assert_frame_equal(result, expected) -def test_me(): +def test_categorical_and_not_categorical_key(): df = pd.DataFrame( { "A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]), @@ -1207,8 +1207,6 @@ def test_me(): } ) result = df.groupby(["A", "C"]).transform("sum")["B"] - # breakpoint() df = pd.DataFrame({"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]}) expected = df.groupby(["A", "C"]).transform("sum")["B"] - # breakpoint() tm.assert_series_equal(result, expected) From 1356da929a0e5d07ac50de63ac2b1ab7a56c1642 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 8 Mar 2020 22:19:11 +0000 Subject: [PATCH 03/15] gh number --- pandas/tests/groupby/test_transform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 5422105619117..b5ec73f8a9a19 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1199,6 +1199,7 @@ def test_transform_lambda_indexing(): def test_categorical_and_not_categorical_key(): + # GH 32494 df = pd.DataFrame( { "A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]), From 690591ba5a3a4a5e7cbbbcd68a3a406b987d4c0c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 9 Mar 2020 12:28:09 +0000 Subject: [PATCH 04/15] use is_categorical --- pandas/core/groupby/generic.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1bdded9ceeeb9..e446c2d0591ce 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -42,6 +42,7 @@ ensure_int64, ensure_platform_int, is_bool, + is_categorical, is_integer_dtype, is_interval_dtype, is_numeric_dtype, @@ -57,7 +58,6 @@ normalize_keyword_aggregation, ) import pandas.core.algorithms as algorithms -from pandas.core.arrays import Categorical from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -70,12 +70,7 @@ _transform_template, get_groupby, ) -from pandas.core.indexes.api import ( - CategoricalIndex, - Index, - MultiIndex, - all_indexes_same, -) +from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series @@ -1459,10 +1454,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: # by take operation ids, _, ngroup = self.grouper.group_info - if any( - isinstance(ping.grouper, (Categorical, CategoricalIndex)) - for ping in self.grouper.groupings - ): + if any(is_categorical(ping.grouper) for ping in self.grouper.groupings): ids = np.array( [result.index.get_loc(i) for i in self.grouper.result_index] )[ids] From fa0cb85172a4bba40a51654bec9761b219730752 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 9 Mar 2020 12:29:32 +0000 Subject: [PATCH 05/15] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e745bf3f5feed..1dc45cbdc4144 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -317,6 +317,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`) - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) +- Bug in :meth:`Groupby.transform` was returning the wrong result when grouping by multiple keys of which some were categorical and others not (:issue:`32494`) Reshaping ^^^^^^^^^ From 9d4d86ce972e9ad73aa76f59bbeed8aaaaecebc4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Mar 2020 21:46:39 +0000 Subject: [PATCH 06/15] reindex result --- pandas/core/groupby/generic.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0c8f2abb78eef..2456439d6d38c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -42,7 +42,6 @@ ensure_int64, ensure_platform_int, is_bool, - is_categorical, is_integer_dtype, is_interval_dtype, is_numeric_dtype, @@ -1456,10 +1455,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: # by take operation ids, _, ngroup = self.grouper.group_info - if any(is_categorical(ping.grouper) for ping in self.grouper.groupings): - ids = np.array( - [result.index.get_loc(i) for i in self.grouper.result_index] - )[ids] + result = result.reindex(self.grouper.result_index) output = [] for i, _ in enumerate(result.columns): From 69c95131904d4e44170e0563b6b2208edfba53e6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Mar 2020 21:48:07 +0000 Subject: [PATCH 07/15] remove blank lines --- pandas/core/groupby/generic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2456439d6d38c..3ddd9da0f2c97 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1454,9 +1454,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: # for each col, reshape to to size of original frame # by take operation ids, _, ngroup = self.grouper.group_info - result = result.reindex(self.grouper.result_index) - output = [] for i, _ in enumerate(result.columns): res = algorithms.take_1d(result.iloc[:, i].values, ids) From c9d9f81a17dc2c7632278cea04ad8b9e1f75e41b Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Tue, 24 Mar 2020 21:19:47 +0000 Subject: [PATCH 08/15] fix for series case too --- pandas/core/groupby/generic.py | 1 + pandas/tests/groupby/test_transform.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3ddd9da0f2c97..4d4c3dba1d56b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -523,6 +523,7 @@ def _transform_fast(self, result, func_nm: str) -> Series: builtin/cythonizable functions """ ids, _, ngroup = self.grouper.group_info + result = result.reindex(self.grouper.result_index) cast = self._transform_should_cast(func_nm) out = algorithms.take_1d(result._values, ids) if cast: diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index b98dcdf3e83f1..323c091d461b1 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1209,7 +1209,14 @@ def test_categorical_and_not_categorical_key(): "C": ["a", "b", "a"], } ) - result = df.groupby(["A", "C"]).transform("sum")["B"] + # DataFrame case + result = df.groupby(["A", "C"]).transform("sum") df = pd.DataFrame({"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]}) - expected = df.groupby(["A", "C"]).transform("sum")["B"] + expected = df.groupby(["A", "C"]).transform("sum") + tm.assert_frame_equal(result, expected) + + # Series case + result = df.groupby(["A", "C"])["B"].transform("sum") + df = pd.DataFrame({"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]}) + expected = df.groupby(["A", "C"])["B"].transform("sum") tm.assert_series_equal(result, expected) From 5586631f624ce7b2baacb837210abf605ff2ee56 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Tue, 24 Mar 2020 21:31:05 +0000 Subject: [PATCH 09/15] correct test --- pandas/tests/groupby/test_transform.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 323c091d461b1..ab042e1b66dde 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1202,21 +1202,23 @@ def test_transform_lambda_indexing(): def test_categorical_and_not_categorical_key(): # GH 32494 - df = pd.DataFrame( + df_with_categorical = pd.DataFrame( { "A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]), "B": [1, 2, 3], "C": ["a", "b", "a"], } ) + df_without_categorical = pd.DataFrame( + {"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]} + ) + # DataFrame case - result = df.groupby(["A", "C"]).transform("sum") - df = pd.DataFrame({"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]}) - expected = df.groupby(["A", "C"]).transform("sum") + result = df_with_categorical.groupby(["A", "C"]).transform("sum") + expected = df_without_categorical.groupby(["A", "C"]).transform("sum") tm.assert_frame_equal(result, expected) # Series case - result = df.groupby(["A", "C"])["B"].transform("sum") - df = pd.DataFrame({"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]}) - expected = df.groupby(["A", "C"])["B"].transform("sum") + result = df_with_categorical.groupby(["A", "C"])["B"].transform("sum") + expected = df_without_categorical.groupby(["A", "C"])["B"].transform("sum") tm.assert_series_equal(result, expected) From fc66150f8407b7c84174adfccd5b1e6704e730b6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 25 Mar 2020 15:21:40 +0000 Subject: [PATCH 10/15] add comment about unobserved categories in categorical case --- pandas/core/groupby/generic.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4d4c3dba1d56b..a486f3c98aea1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -42,6 +42,7 @@ ensure_int64, ensure_platform_int, is_bool, + is_categorical, is_integer_dtype, is_interval_dtype, is_numeric_dtype, @@ -523,7 +524,11 @@ def _transform_fast(self, result, func_nm: str) -> Series: builtin/cythonizable functions """ ids, _, ngroup = self.grouper.group_info - result = result.reindex(self.grouper.result_index) + + # in categorical case there may be unobserved categories in index + if any(is_categorical(ping.grouper) for ping in self.grouper.groupings): + result = result.reindex(self.grouper.result_index) + cast = self._transform_should_cast(func_nm) out = algorithms.take_1d(result._values, ids) if cast: @@ -1455,7 +1460,11 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: # for each col, reshape to to size of original frame # by take operation ids, _, ngroup = self.grouper.group_info - result = result.reindex(self.grouper.result_index) + + # in categorical case there may be unobserved categories in index + if any(is_categorical(ping.grouper) for ping in self.grouper.groupings): + result = result.reindex(self.grouper.result_index) + output = [] for i, _ in enumerate(result.columns): res = algorithms.take_1d(result.iloc[:, i].values, ids) From b9cdde9515b7c08c33a629cb396f442d691cc94a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 9 May 2020 19:58:26 +0100 Subject: [PATCH 11/15] is_categorical -> is_categorical_dtype --- pandas/core/groupby/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index adf7371976422..53aa6d81d4290 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -44,7 +44,7 @@ ensure_int64, ensure_platform_int, is_bool, - is_categorical, + is_categorical_dtype, is_integer_dtype, is_interval_dtype, is_numeric_dtype, @@ -542,7 +542,7 @@ def _transform_fast(self, result, func_nm: str) -> Series: ids, _, ngroup = self.grouper.group_info # in categorical case there may be unobserved categories in index - if any(is_categorical(ping.grouper) for ping in self.grouper.groupings): + if any(is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings): result = result.reindex(self.grouper.result_index) cast = self._transform_should_cast(func_nm) @@ -1483,7 +1483,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: ids, _, ngroup = self.grouper.group_info # in categorical case there may be unobserved categories in index - if any(is_categorical(ping.grouper) for ping in self.grouper.groupings): + if any(is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings): result = result.reindex(self.grouper.result_index) output = [] From 016f5fa59e3299052d1142f1ef615a13fafa984f Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 10 May 2020 11:19:03 +0100 Subject: [PATCH 12/15] dont reindex if observed is True, add short description of test, parametrize over observed --- pandas/core/groupby/generic.py | 8 ++++++-- pandas/tests/groupby/transform/test_transform.py | 16 +++++++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 53aa6d81d4290..9925372cb2aad 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -542,7 +542,9 @@ def _transform_fast(self, result, func_nm: str) -> Series: ids, _, ngroup = self.grouper.group_info # in categorical case there may be unobserved categories in index - if any(is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings): + if not self.observed and any( + is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings + ): result = result.reindex(self.grouper.result_index) cast = self._transform_should_cast(func_nm) @@ -1483,7 +1485,9 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: ids, _, ngroup = self.grouper.group_info # in categorical case there may be unobserved categories in index - if any(is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings): + if not self.observed and any( + is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings + ): result = result.reindex(self.grouper.result_index) output = [] diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 24a0e7acad632..fe9d0cb205ce9 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1197,7 +1197,11 @@ def test_transform_lambda_indexing(): tm.assert_frame_equal(result, expected) -def test_categorical_and_not_categorical_key(): +@pytest.mark.parametrize("observed", [True, False]) +def test_categorical_and_not_categorical_key(observed): + # Checks that groupby-transform, when grouping by both a categorical + # and a non-categorical key, doesn't try to expand the output to include + # non-observed categories but instead matches the input shape. # GH 32494 df_with_categorical = pd.DataFrame( { @@ -1211,11 +1215,17 @@ def test_categorical_and_not_categorical_key(): ) # DataFrame case - result = df_with_categorical.groupby(["A", "C"]).transform("sum") + result = df_with_categorical.groupby(["A", "C"], observed=observed).transform("sum") expected = df_without_categorical.groupby(["A", "C"]).transform("sum") tm.assert_frame_equal(result, expected) + expected_explicit = pd.DataFrame({"B": [4, 2, 4]}) + tm.assert_frame_equal(result, expected_explicit) # Series case - result = df_with_categorical.groupby(["A", "C"])["B"].transform("sum") + result = df_with_categorical.groupby(["A", "C"], observed=observed)["B"].transform( + "sum" + ) expected = df_without_categorical.groupby(["A", "C"])["B"].transform("sum") tm.assert_series_equal(result, expected) + expected_explicit = pd.Series([4, 2, 4], name="B") + tm.assert_frame_equal(result, expected_explicit) From 869e1f557e6b92b8ab1d10c78b301e376ef8b0e6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 10 May 2020 11:38:44 +0100 Subject: [PATCH 13/15] assert frame equal -> assert series equal --- pandas/tests/groupby/transform/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index fe9d0cb205ce9..d1a562be9644d 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1228,4 +1228,4 @@ def test_categorical_and_not_categorical_key(observed): expected = df_without_categorical.groupby(["A", "C"])["B"].transform("sum") tm.assert_series_equal(result, expected) expected_explicit = pd.Series([4, 2, 4], name="B") - tm.assert_frame_equal(result, expected_explicit) + tm.assert_series_equal(result, expected_explicit) From c3db9a76c6e7ac87fe208dd21e43b5fcc97392fc Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 18 May 2020 07:56:09 +0100 Subject: [PATCH 14/15] don't special case the reindexing --- pandas/core/groupby/generic.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9925372cb2aad..b7c04358ad77c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -44,7 +44,6 @@ ensure_int64, ensure_platform_int, is_bool, - is_categorical_dtype, is_integer_dtype, is_interval_dtype, is_numeric_dtype, @@ -540,13 +539,7 @@ def _transform_fast(self, result, func_nm: str) -> Series: builtin/cythonizable functions """ ids, _, ngroup = self.grouper.group_info - - # in categorical case there may be unobserved categories in index - if not self.observed and any( - is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings - ): - result = result.reindex(self.grouper.result_index) - + result = result.reindex(self.grouper.result_index, copy=False) cast = self._transform_should_cast(func_nm) out = algorithms.take_1d(result._values, ids) if cast: @@ -1483,13 +1476,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: # for each col, reshape to to size of original frame # by take operation ids, _, ngroup = self.grouper.group_info - - # in categorical case there may be unobserved categories in index - if not self.observed and any( - is_categorical_dtype(ping.grouper) for ping in self.grouper.groupings - ): - result = result.reindex(self.grouper.result_index) - + result = result.reindex(self.grouper.result_index, copy=False) output = [] for i, _ in enumerate(result.columns): res = algorithms.take_1d(result.iloc[:, i].values, ids) From c9b98810edf38792dad326b6d837c721462b32df Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 12 Jun 2020 19:03:47 +0100 Subject: [PATCH 15/15] use observed fixture --- pandas/tests/groupby/transform/test_transform.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index d1a562be9644d..461562a5ca04f 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1197,7 +1197,6 @@ def test_transform_lambda_indexing(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("observed", [True, False]) def test_categorical_and_not_categorical_key(observed): # Checks that groupby-transform, when grouping by both a categorical # and a non-categorical key, doesn't try to expand the output to include