From e6c19e17e620a010d46b3887ab21d21b07b653c2 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 9 Nov 2019 14:43:35 +0000 Subject: [PATCH 1/3] API: Remove kwargs from GroupBy --- pandas/core/generic.py | 10 ++++------ pandas/core/groupby/groupby.py | 18 +++++++----------- pandas/tests/window/test_grouper.py | 2 +- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2468c43337d0d..8b86f38bde412 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7830,7 +7830,7 @@ def groupby( group_keys=True, squeeze=False, observed=False, - **kwargs + mutated=False, ): """ Group DataFrame or Series using a mapper or by a Series of columns. @@ -7875,10 +7875,8 @@ def groupby( If False: show all values for categorical groupers. .. versionadded:: 0.23.0 - - **kwargs - Optional, only accepts keyword argument 'mutated' and is passed - to groupby. + mutated : bool, False + `mutated` is passed to groupby. Returns ------- @@ -7956,7 +7954,7 @@ def groupby( group_keys=group_keys, squeeze=squeeze, observed=observed, - **kwargs + mutated=mutated, ) def asfreq(self, freq, method=None, how=None, normalize=False, fill_value=None): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fd45d60b02277..93a731d00b475 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -26,7 +26,6 @@ class providing the base-class of operations. from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly -from pandas.util._validators import validate_kwargs from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( @@ -349,12 +348,12 @@ def __init__( grouper=None, exclusions=None, selection=None, - as_index=True, - sort=True, - group_keys=True, - squeeze=False, - observed=False, - **kwargs + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, ): self._selection = selection @@ -376,7 +375,7 @@ def __init__( self.group_keys = group_keys self.squeeze = squeeze self.observed = observed - self.mutated = kwargs.pop("mutated", False) + self.mutated = mutated if grouper is None: from pandas.core.groupby.grouper import get_grouper @@ -396,9 +395,6 @@ def __init__( self.grouper = grouper self.exclusions = set(exclusions) if exclusions else set() - # we accept no other args - validate_kwargs("group", kwargs, {}) - def __len__(self) -> int: return len(self.groups) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index b726bd3e3c8a7..b65551bcbcd5f 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -13,7 +13,7 @@ def setup_method(self, method): def test_mutated(self): - msg = r"group\(\) got an unexpected keyword argument 'foo'" + msg = r"groupby\(\) got an unexpected keyword argument 'foo'" with pytest.raises(TypeError, match=msg): self.frame.groupby("A", foo=1) From 46d2245ceb0ffa099863a992ff9bf2e9fc84e9cc Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 11 Nov 2019 22:09:13 +0000 Subject: [PATCH 2/3] Deprecate mutated --- doc/source/whatsnew/v1.0.0.rst | 3 +- pandas/core/generic.py | 16 ++++++++--- pandas/core/groupby/generic.py | 4 +-- pandas/core/groupby/groupby.py | 42 ++++++++++++++++++++++------ pandas/core/resample.py | 6 ++-- pandas/tests/groupby/test_groupby.py | 7 +++++ pandas/tests/window/test_grouper.py | 7 +++-- 7 files changed, 63 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cd012fe755337..ed869936815e6 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -215,7 +215,8 @@ Deprecations value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). - :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) - +- :meth:`DataFrame.groupby` and :meth:`Series.groupby` accepted a ``mutated`` parameter, which is actually + an implementation detail. It has been deprecated in the public interface (:issue:`29511`). .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8b86f38bde412..539bb00eea542 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7875,8 +7875,10 @@ def groupby( If False: show all values for categorical groupers. .. versionadded:: 0.23.0 - mutated : bool, False - `mutated` is passed to groupby. + mutated : bool, default False + .. deprecated:: 1.0 + This is an internal parameter and will be removed from the public + interface in the future Returns ------- @@ -7939,12 +7941,18 @@ def groupby( Captive 210.0 Wild 185.0 """ - from pandas.core.groupby.groupby import groupby + from pandas.core.groupby.groupby import get_groupby if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) - return groupby( + + if mutated is not False: + warnings.warn( + "Parameter 'mutated' is deprecated", FutureWarning, stacklevel=2 + ) + + return get_groupby( self, by=by, axis=axis, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 56a8a7d15077b..f0b42ec1156f6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -62,7 +62,7 @@ GroupBy, _apply_docs, _transform_template, - groupby, + get_groupby, ) from pandas.core.index import Index, MultiIndex, _all_indexes_same import pandas.core.indexes.base as ibase @@ -997,7 +997,7 @@ def _cython_agg_blocks( # reductions; see GH#28949 obj = obj.iloc[:, 0] - s = groupby(obj, self.grouper) + s = get_groupby(obj, self.grouper) try: result = s.aggregate(lambda x: alt(x, axis=self.axis)) except TypeError: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 93a731d00b475..1bd2a1e57d223 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2486,18 +2486,42 @@ def _reindex_output(self, output): @Appender(GroupBy.__doc__) -def groupby(obj: NDFrame, by, **kwds): - if isinstance(obj, Series): - from pandas.core.groupby.generic import SeriesGroupBy +def get_groupby( + obj: NDFrame, + by=None, + axis: int = 0, + level=None, + grouper=None, + exclusions=None, + selection=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, +): + from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy - klass = ( - SeriesGroupBy - ) # type: Union[Type["SeriesGroupBy"], Type["DataFrameGroupBy"]] + if isinstance(obj, Series): + klass = SeriesGroupBy elif isinstance(obj, DataFrame): - from pandas.core.groupby.generic import DataFrameGroupBy - klass = DataFrameGroupBy else: raise TypeError("invalid type: {obj}".format(obj=obj)) - return klass(obj, by, **kwds) + return klass( + obj=obj, + keys=by, + axis=axis, + level=level, + grouper=grouper, + exclusions=exclusions, + selection=selection, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + mutated=mutated, + ) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 6d877bf666881..d980d5ba0be6e 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -21,7 +21,7 @@ from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin from pandas.core.groupby.generic import SeriesGroupBy -from pandas.core.groupby.groupby import GroupBy, _GroupBy, _pipe_template, groupby +from pandas.core.groupby.groupby import GroupBy, _GroupBy, _pipe_template, get_groupby from pandas.core.groupby.grouper import Grouper from pandas.core.groupby.ops import BinGrouper from pandas.core.indexes.datetimes import DatetimeIndex, date_range @@ -334,7 +334,7 @@ def _gotitem(self, key, ndim, subset=None): grouper = self.grouper if subset is None: subset = self.obj - grouped = groupby(subset, by=None, grouper=grouper, axis=self.axis) + grouped = get_groupby(subset, by=None, grouper=grouper, axis=self.axis) # try the key selection try: @@ -353,7 +353,7 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): obj = self._selected_obj - grouped = groupby(obj, by=None, grouper=grouper, axis=self.axis) + grouped = get_groupby(obj, by=None, grouper=grouper, axis=self.axis) try: if isinstance(obj, ABCDataFrame) and callable(how): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e17181f55fdba..74a03e6bc53b8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -953,6 +953,13 @@ def test_no_mutate_but_looks_like(): tm.assert_series_equal(result1, result2) +def test_mutated_deprecated(): + # GH-29511 + df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=True): + df.groupby("key", mutated=True) + + def test_groupby_series_indexed_differently(): s1 = Series( [5.0, -9.0, 4.0, 100.0, -5.0, 55.0, 6.7], diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index b65551bcbcd5f..c278897e1d395 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -3,6 +3,7 @@ import pandas as pd from pandas import DataFrame, Series +from pandas.core.groupby.groupby import get_groupby import pandas.util.testing as tm @@ -19,12 +20,12 @@ def test_mutated(self): g = self.frame.groupby("A") assert not g.mutated - g = self.frame.groupby("A", mutated=True) + g = get_groupby(self.frame, by="A", mutated=True) assert g.mutated def test_getitem(self): g = self.frame.groupby("A") - g_mutated = self.frame.groupby("A", mutated=True) + g_mutated = get_groupby(self.frame, by="A", mutated=True) expected = g_mutated.B.apply(lambda x: x.rolling(2).mean()) @@ -45,7 +46,7 @@ def test_getitem_multiple(self): # GH 13174 g = self.frame.groupby("A") r = g.rolling(2) - g_mutated = self.frame.groupby("A", mutated=True) + g_mutated = get_groupby(self.frame, by="A", mutated=True) expected = g_mutated.B.apply(lambda x: x.rolling(2).count()) result = r.B.count() From d590eb91c91794eee7804a11b6c42d1c2e3da2af Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 12 Nov 2019 08:12:37 +0000 Subject: [PATCH 3/3] Removed mutated from public interface --- doc/source/whatsnew/v1.0.0.rst | 3 +-- pandas/core/generic.py | 11 ----------- pandas/core/groupby/groupby.py | 9 +++++++-- pandas/tests/groupby/test_groupby.py | 7 ------- 4 files changed, 8 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ed869936815e6..cd012fe755337 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -215,8 +215,7 @@ Deprecations value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). - :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) -- :meth:`DataFrame.groupby` and :meth:`Series.groupby` accepted a ``mutated`` parameter, which is actually - an implementation detail. It has been deprecated in the public interface (:issue:`29511`). + .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 539bb00eea542..bcfd624bd8354 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7830,7 +7830,6 @@ def groupby( group_keys=True, squeeze=False, observed=False, - mutated=False, ): """ Group DataFrame or Series using a mapper or by a Series of columns. @@ -7875,10 +7874,6 @@ def groupby( If False: show all values for categorical groupers. .. versionadded:: 0.23.0 - mutated : bool, default False - .. deprecated:: 1.0 - This is an internal parameter and will be removed from the public - interface in the future Returns ------- @@ -7947,11 +7942,6 @@ def groupby( raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) - if mutated is not False: - warnings.warn( - "Parameter 'mutated' is deprecated", FutureWarning, stacklevel=2 - ) - return get_groupby( self, by=by, @@ -7962,7 +7952,6 @@ def groupby( group_keys=group_keys, squeeze=squeeze, observed=observed, - mutated=mutated, ) def asfreq(self, freq, method=None, how=None, normalize=False, fill_value=None): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1bd2a1e57d223..dc4d363bffffa 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2501,11 +2501,16 @@ def get_groupby( observed: bool = False, mutated: bool = False, ): - from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy if isinstance(obj, Series): - klass = SeriesGroupBy + from pandas.core.groupby.generic import SeriesGroupBy + + klass = ( + SeriesGroupBy + ) # type: Union[Type["SeriesGroupBy"], Type["DataFrameGroupBy"]] elif isinstance(obj, DataFrame): + from pandas.core.groupby.generic import DataFrameGroupBy + klass = DataFrameGroupBy else: raise TypeError("invalid type: {obj}".format(obj=obj)) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 74a03e6bc53b8..e17181f55fdba 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -953,13 +953,6 @@ def test_no_mutate_but_looks_like(): tm.assert_series_equal(result1, result2) -def test_mutated_deprecated(): - # GH-29511 - df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=True): - df.groupby("key", mutated=True) - - def test_groupby_series_indexed_differently(): s1 = Series( [5.0, -9.0, 4.0, 100.0, -5.0, 55.0, 6.7],