Skip to content

Commit 4caa378

Browse files
committed
Deprecate default observed=False for groupby
1 parent 094c7ca commit 4caa378

File tree

5 files changed

+43
-5
lines changed

5 files changed

+43
-5
lines changed

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6528,7 +6528,7 @@ def groupby(
65286528
sort: bool = True,
65296529
group_keys: bool = True,
65306530
squeeze: bool = no_default,
6531-
observed: bool = False,
6531+
observed: Optional[bool] = None,
65326532
dropna: bool = True,
65336533
) -> "DataFrameGroupBy":
65346534
from pandas.core.groupby.generic import DataFrameGroupBy

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ def __init__(
496496
sort: bool = True,
497497
group_keys: bool = True,
498498
squeeze: bool = False,
499-
observed: bool = False,
499+
observed: Optional[bool] = None,
500500
mutated: bool = False,
501501
dropna: bool = True,
502502
):
@@ -2912,7 +2912,7 @@ def get_groupby(
29122912
sort: bool = True,
29132913
group_keys: bool = True,
29142914
squeeze: bool = False,
2915-
observed: bool = False,
2915+
observed: Optional[bool] = None,
29162916
mutated: bool = False,
29172917
dropna: bool = True,
29182918
) -> GroupBy:

pandas/core/groupby/grouper.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Provide user facing operators for doing the split part of the
33
split-apply-combine paradigm.
44
"""
5+
import textwrap
56
from typing import Dict, Hashable, List, Optional, Tuple
67
import warnings
78

@@ -31,6 +32,17 @@
3132

3233
from pandas.io.formats.printing import pprint_thing
3334

35+
_observed_msg = textwrap.dedent(
36+
"""\
37+
Using 'observed=False', because grouping on a categorical. A future version
38+
of pandas will change to 'observed=True'.
39+
40+
To silence the warning and switch to the future behaior, pass 'observed=True'.
41+
42+
To keep the current behavior and silence the behavior, pass 'observed=False'.
43+
"""
44+
)
45+
3446

3547
class Grouper:
3648
"""
@@ -418,7 +430,7 @@ def __init__(
418430
name=None,
419431
level=None,
420432
sort: bool = True,
421-
observed: bool = False,
433+
observed: Optional[bool] = None,
422434
in_axis: bool = False,
423435
dropna: bool = True,
424436
):
@@ -481,6 +493,9 @@ def __init__(
481493
# a passed Categorical
482494
elif is_categorical_dtype(self.grouper):
483495

496+
if observed is None:
497+
warnings.warn(_observed_msg, FutureWarning, stacklevel=5)
498+
484499
self.grouper, self.all_grouper = recode_for_groupby(
485500
self.grouper, self.sort, observed
486501
)
@@ -605,7 +620,7 @@ def get_grouper(
605620
axis: int = 0,
606621
level=None,
607622
sort: bool = True,
608-
observed: bool = False,
623+
observed: Optional[bool] = None,
609624
mutated: bool = False,
610625
validate: bool = True,
611626
dropna: bool = True,

pandas/core/shared_docs.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,14 @@
120120
If True: only show observed values for categorical groupers.
121121
If False: show all values for categorical groupers.
122122
123+
The current default of ``observed=False`` is deprecated and will
124+
change to ``observed=True`` in a future version of pandas.
125+
126+
Explicitly pass ``observed=True`` to silence the warning and not
127+
show all observed values.
128+
Explicitly pass ``observed=False`` to silence the warning and
129+
show groups for all observed values.
130+
123131
.. versionadded:: 0.23.0
124132
dropna : bool, default True
125133
If True, and if group keys contain NA values, NA values together

pandas/tests/groupby/test_grouping.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,21 @@ def test_groupby_multiindex_level_empty(self):
691691
)
692692
tm.assert_frame_equal(result, expected)
693693

694+
def test_default_observed_deprecated(self):
695+
df = pd.DataFrame([
696+
['A', 1, 1], ['A', 2, 1], ['B', 1, 1]
697+
], columns=['x', 'y', 'z'])
698+
df.x = df.x.astype('category')
699+
df.y = df.x.astype('category')
700+
701+
with tm.assert_produces_warning(expected_warning=FutureWarning):
702+
df.groupby(['x', 'y'])
703+
704+
with pytest.warns(None) as any_warnings:
705+
df.groupby(['x', 'y'], observed=True)
706+
df.groupby(['x', 'y'], observed=False)
707+
assert len(any_warnings) == 0
708+
694709

695710
# get_group
696711
# --------------------------------

0 commit comments

Comments
 (0)