Skip to content

Commit ee141ca

Browse files
committed
Deprecate default observed=False for groupby
1 parent a313f7f commit ee141ca

File tree

5 files changed

+44
-5
lines changed

5 files changed

+44
-5
lines changed

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6526,7 +6526,7 @@ def groupby(
65266526
sort: bool = True,
65276527
group_keys: bool = True,
65286528
squeeze: bool = no_default,
6529-
observed: bool = False,
6529+
observed: Optional[bool] = None,
65306530
dropna: bool = True,
65316531
) -> DataFrameGroupBy:
65326532
from pandas.core.groupby.generic import DataFrameGroupBy

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def __init__(
502502
sort: bool = True,
503503
group_keys: bool = True,
504504
squeeze: bool = False,
505-
observed: bool = False,
505+
observed: Optional[bool] = None,
506506
mutated: bool = False,
507507
dropna: bool = True,
508508
):
@@ -2959,7 +2959,7 @@ def get_groupby(
29592959
sort: bool = True,
29602960
group_keys: bool = True,
29612961
squeeze: bool = False,
2962-
observed: bool = False,
2962+
observed: Optional[bool] = None,
29632963
mutated: bool = False,
29642964
dropna: bool = True,
29652965
) -> GroupBy:

pandas/core/groupby/grouper.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Provide user facing operators for doing the split part of the
33
split-apply-combine paradigm.
44
"""
5+
import textwrap
56
from typing import Dict, Hashable, List, Optional, Set, Tuple
67
import warnings
78

@@ -31,6 +32,17 @@
3132

3233
from pandas.io.formats.printing import pprint_thing
3334

35+
_observed_msg = textwrap.dedent(
36+
"""\
37+
Using 'observed=False', because grouping on a categorical. A future version
38+
of pandas will change to 'observed=True'.
39+
40+
To silence the warning and switch to the future behaior, pass 'observed=True'.
41+
42+
To keep the current behavior and silence the behavior, pass 'observed=False'.
43+
"""
44+
)
45+
3446

3547
class Grouper:
3648
"""
@@ -425,7 +437,7 @@ def __init__(
425437
name=None,
426438
level=None,
427439
sort: bool = True,
428-
observed: bool = False,
440+
observed: Optional[bool] = None,
429441
in_axis: bool = False,
430442
dropna: bool = True,
431443
):
@@ -488,6 +500,9 @@ def __init__(
488500
# a passed Categorical
489501
elif is_categorical_dtype(self.grouper):
490502

503+
if observed is None:
504+
warnings.warn(_observed_msg, FutureWarning, stacklevel=5)
505+
491506
self.grouper, self.all_grouper = recode_for_groupby(
492507
self.grouper, self.sort, observed
493508
)
@@ -622,7 +637,7 @@ def get_grouper(
622637
axis: int = 0,
623638
level=None,
624639
sort: bool = True,
625-
observed: bool = False,
640+
observed: Optional[bool] = None,
626641
mutated: bool = False,
627642
validate: bool = True,
628643
dropna: bool = True,

pandas/core/shared_docs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@
119119
This only applies if any of the groupers are Categoricals.
120120
If True: only show observed values for categorical groupers.
121121
If False: show all values for categorical groupers.
122+
123+
The current default of ``observed=False`` is deprecated and will
124+
change to ``observed=True`` in a future version of pandas.
125+
126+
Explicitly pass ``observed=True`` to silence the warning and not
127+
show all observed values.
128+
Explicitly pass ``observed=False`` to silence the warning and
129+
show groups for all observed values.
130+
122131
dropna : bool, default True
123132
If True, and if group keys contain NA values, NA values together
124133
with row/column will be dropped.

pandas/tests/groupby/test_grouping.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,21 @@ def test_groupby_multiindex_level_empty(self):
691691
)
692692
tm.assert_frame_equal(result, expected)
693693

694+
def test_default_observed_deprecated(self):
695+
df = pd.DataFrame([
696+
['A', 1, 1], ['A', 2, 1], ['B', 1, 1]
697+
], columns=['x', 'y', 'z'])
698+
df.x = df.x.astype('category')
699+
df.y = df.x.astype('category')
700+
701+
with tm.assert_produces_warning(expected_warning=FutureWarning):
702+
df.groupby(['x', 'y'])
703+
704+
with pytest.warns(None) as any_warnings:
705+
df.groupby(['x', 'y'], observed=True)
706+
df.groupby(['x', 'y'], observed=False)
707+
assert len(any_warnings) == 0
708+
694709

695710
# get_group
696711
# --------------------------------

0 commit comments

Comments
 (0)