Skip to content

Commit 6b76969

Browse files
committed
Deprecate default observed=False for groupby
1 parent 450384e commit 6b76969

File tree

5 files changed

+44
-5
lines changed

5 files changed

+44
-5
lines changed

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6698,7 +6698,7 @@ def groupby(
66986698
sort: bool = True,
66996699
group_keys: bool = True,
67006700
squeeze: bool = no_default,
6701-
observed: bool = False,
6701+
observed: Optional[bool] = None,
67026702
dropna: bool = True,
67036703
) -> DataFrameGroupBy:
67046704
from pandas.core.groupby.generic import DataFrameGroupBy

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ def __init__(
526526
sort: bool = True,
527527
group_keys: bool = True,
528528
squeeze: bool = False,
529-
observed: bool = False,
529+
observed: Optional[bool] = None,
530530
mutated: bool = False,
531531
dropna: bool = True,
532532
):
@@ -3009,7 +3009,7 @@ def get_groupby(
30093009
sort: bool = True,
30103010
group_keys: bool = True,
30113011
squeeze: bool = False,
3012-
observed: bool = False,
3012+
observed: Optional[bool] = None,
30133013
mutated: bool = False,
30143014
dropna: bool = True,
30153015
) -> GroupBy:

pandas/core/groupby/grouper.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Provide user facing operators for doing the split part of the
33
split-apply-combine paradigm.
44
"""
5+
import textwrap
56
from typing import Dict, Hashable, List, Optional, Set, Tuple
67
import warnings
78

@@ -31,6 +32,17 @@
3132

3233
from pandas.io.formats.printing import pprint_thing
3334

35+
_observed_msg = textwrap.dedent(
36+
"""\
37+
Using 'observed=False', because grouping on a categorical. A future version
38+
of pandas will change to 'observed=True'.
39+
40+
To silence the warning and switch to the future behaior, pass 'observed=True'.
41+
42+
To keep the current behavior and silence the behavior, pass 'observed=False'.
43+
"""
44+
)
45+
3446

3547
class Grouper:
3648
"""
@@ -432,7 +444,7 @@ def __init__(
432444
name=None,
433445
level=None,
434446
sort: bool = True,
435-
observed: bool = False,
447+
observed: Optional[bool] = None,
436448
in_axis: bool = False,
437449
dropna: bool = True,
438450
):
@@ -495,6 +507,9 @@ def __init__(
495507
# a passed Categorical
496508
elif is_categorical_dtype(self.grouper):
497509

510+
if observed is None:
511+
warnings.warn(_observed_msg, FutureWarning, stacklevel=5)
512+
498513
self.grouper, self.all_grouper = recode_for_groupby(
499514
self.grouper, self.sort, observed
500515
)
@@ -631,7 +646,7 @@ def get_grouper(
631646
axis: int = 0,
632647
level=None,
633648
sort: bool = True,
634-
observed: bool = False,
649+
observed: Optional[bool] = None,
635650
mutated: bool = False,
636651
validate: bool = True,
637652
dropna: bool = True,

pandas/core/shared_docs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@
119119
This only applies if any of the groupers are Categoricals.
120120
If True: only show observed values for categorical groupers.
121121
If False: show all values for categorical groupers.
122+
123+
The current default of ``observed=False`` is deprecated and will
124+
change to ``observed=True`` in a future version of pandas.
125+
126+
Explicitly pass ``observed=True`` to silence the warning and not
127+
show all observed values.
128+
Explicitly pass ``observed=False`` to silence the warning and
129+
show groups for all observed values.
130+
122131
dropna : bool, default True
123132
If True, and if group keys contain NA values, NA values together
124133
with row/column will be dropped.

pandas/tests/groupby/test_grouping.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,21 @@ def test_groupby_multiindex_level_empty(self):
703703
)
704704
tm.assert_frame_equal(result, expected)
705705

706+
def test_default_observed_deprecated(self):
707+
df = pd.DataFrame([
708+
['A', 1, 1], ['A', 2, 1], ['B', 1, 1]
709+
], columns=['x', 'y', 'z'])
710+
df.x = df.x.astype('category')
711+
df.y = df.x.astype('category')
712+
713+
with tm.assert_produces_warning(expected_warning=FutureWarning):
714+
df.groupby(['x', 'y'])
715+
716+
with pytest.warns(None) as any_warnings:
717+
df.groupby(['x', 'y'], observed=True)
718+
df.groupby(['x', 'y'], observed=False)
719+
assert len(any_warnings) == 0
720+
706721

707722
# get_group
708723
# --------------------------------

0 commit comments

Comments
 (0)