Skip to content

Commit df44731

Browse files
committed
Deprecate default observed=False for groupby
1 parent cb76a6d commit df44731

File tree

5 files changed

+43
-5
lines changed

5 files changed

+43
-5
lines changed

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6524,7 +6524,7 @@ def groupby(
65246524
sort: bool = True,
65256525
group_keys: bool = True,
65266526
squeeze: bool = no_default,
6527-
observed: bool = False,
6527+
observed: Optional[bool] = None,
65286528
dropna: bool = True,
65296529
) -> "DataFrameGroupBy":
65306530
from pandas.core.groupby.generic import DataFrameGroupBy

pandas/core/generic.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7495,6 +7495,14 @@ def clip(
74957495
If True: only show observed values for categorical groupers.
74967496
If False: show all values for categorical groupers.
74977497
7498+
The current default of ``observed=False`` is deprecated and will
7499+
change to ``observed=True`` in a future version of pandas.
7500+
7501+
Explicitly pass ``observed=True`` to silence the warning and not
7502+
show all observed values.
7503+
Explicitly pass ``observed=False`` to silence the warning and
7504+
show groups for all observed values.
7505+
74987506
.. versionadded:: 0.23.0
74997507
dropna : bool, default True
75007508
If True, and if group keys contain NA values, NA values together

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ def __init__(
496496
sort: bool = True,
497497
group_keys: bool = True,
498498
squeeze: bool = False,
499-
observed: bool = False,
499+
observed: Optional[bool] = None,
500500
mutated: bool = False,
501501
dropna: bool = True,
502502
):
@@ -2912,7 +2912,7 @@ def get_groupby(
29122912
sort: bool = True,
29132913
group_keys: bool = True,
29142914
squeeze: bool = False,
2915-
observed: bool = False,
2915+
observed: Optional[bool] = None,
29162916
mutated: bool = False,
29172917
dropna: bool = True,
29182918
) -> GroupBy:

pandas/core/groupby/grouper.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Provide user facing operators for doing the split part of the
33
split-apply-combine paradigm.
44
"""
5+
import textwrap
56
from typing import Dict, Hashable, List, Optional, Tuple
67
import warnings
78

@@ -31,6 +32,17 @@
3132

3233
from pandas.io.formats.printing import pprint_thing
3334

35+
_observed_msg = textwrap.dedent(
36+
"""\
37+
Using 'observed=False', because grouping on a categorical. A future version
38+
of pandas will change to 'observed=True'.
39+
40+
To silence the warning and switch to the future behaior, pass 'observed=True'.
41+
42+
To keep the current behavior and silence the behavior, pass 'observed=False'.
43+
"""
44+
)
45+
3446

3547
class Grouper:
3648
"""
@@ -418,7 +430,7 @@ def __init__(
418430
name=None,
419431
level=None,
420432
sort: bool = True,
421-
observed: bool = False,
433+
observed: Optional[bool] = None,
422434
in_axis: bool = False,
423435
dropna: bool = True,
424436
):
@@ -481,6 +493,9 @@ def __init__(
481493
# a passed Categorical
482494
elif is_categorical_dtype(self.grouper):
483495

496+
if observed is None:
497+
warnings.warn(_observed_msg, FutureWarning, stacklevel=5)
498+
484499
self.grouper, self.all_grouper = recode_for_groupby(
485500
self.grouper, self.sort, observed
486501
)
@@ -603,7 +618,7 @@ def get_grouper(
603618
axis: int = 0,
604619
level=None,
605620
sort: bool = True,
606-
observed: bool = False,
621+
observed: Optional[bool] = None,
607622
mutated: bool = False,
608623
validate: bool = True,
609624
dropna: bool = True,

pandas/tests/groupby/test_grouping.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,21 @@ def test_groupby_multiindex_level_empty(self):
691691
)
692692
tm.assert_frame_equal(result, expected)
693693

694+
def test_default_observed_deprecated(self):
695+
df = pd.DataFrame([
696+
['A', 1, 1], ['A', 2, 1], ['B', 1, 1]
697+
], columns=['x', 'y', 'z'])
698+
df.x = df.x.astype('category')
699+
df.y = df.x.astype('category')
700+
701+
with tm.assert_produces_warning(expected_warning=FutureWarning):
702+
df.groupby(['x', 'y'])
703+
704+
with pytest.warns(None) as any_warnings:
705+
df.groupby(['x', 'y'], observed=True)
706+
df.groupby(['x', 'y'], observed=False)
707+
assert len(any_warnings) == 0
708+
694709

695710
# get_group
696711
# --------------------------------

0 commit comments

Comments
 (0)