Skip to content

REF: Move part of groupby.agg to apply #39311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ repos:
files: ^pandas/tests/
- id: FrameOrSeriesUnion
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
entry: Union\[.*(Series,.*DataFrame|DataFrame,.*Series).*\]
language: pygrep
types: [python]
exclude: ^pandas/_typing\.py$
Expand Down
42 changes: 40 additions & 2 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,18 @@

import abc
import inspect
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Type, cast
from typing import (
TYPE_CHECKING,
Any,
Dict,
Iterator,
List,
Optional,
Tuple,
Type,
Union,
cast,
)

import numpy as np

Expand All @@ -13,6 +24,7 @@
AggFuncType,
AggFuncTypeBase,
AggFuncTypeDict,
AggObjType,
Axis,
FrameOrSeriesUnion,
)
Expand All @@ -34,6 +46,7 @@

if TYPE_CHECKING:
from pandas import DataFrame, Index, Series
from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy

ResType = Dict[int, Any]

Expand Down Expand Up @@ -86,7 +99,7 @@ class Apply(metaclass=abc.ABCMeta):

def __init__(
self,
obj: FrameOrSeriesUnion,
obj: AggObjType,
func,
raw: bool,
result_type: Optional[str],
Expand Down Expand Up @@ -646,3 +659,28 @@ def apply_standard(self) -> FrameOrSeriesUnion:
return obj._constructor(mapped, index=obj.index).__finalize__(
obj, method="apply"
)


class GroupByApply(Apply):
obj: Union[SeriesGroupBy, DataFrameGroupBy]

def __init__(
self,
obj: Union[SeriesGroupBy, DataFrameGroupBy],
func: AggFuncType,
args,
kwds,
):
kwds = kwds.copy()
self.axis = obj.obj._get_axis_number(kwds.get("axis", 0))
super().__init__(
obj,
func,
raw=False,
result_type=None,
args=args,
kwds=kwds,
)

def apply(self):
raise NotImplementedError
5 changes: 3 additions & 2 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@
from pandas.core import algorithms, nanops
from pandas.core.aggregation import (
agg_list_like,
aggregate,
maybe_mangle_lambdas,
reconstruct_func,
validate_func_kwargs,
)
from pandas.core.apply import GroupByApply
from pandas.core.arrays import Categorical, ExtensionArray
from pandas.core.base import DataError, SpecificationError
import pandas.core.common as com
Expand Down Expand Up @@ -952,7 +952,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
relabeling, func, columns, order = reconstruct_func(func, **kwargs)
func = maybe_mangle_lambdas(func)

result, how = aggregate(self, func, *args, **kwargs)
op = GroupByApply(self, func, args, kwargs)
result, how = op.agg()
if how is None:
return result

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,18 @@ def test_aggregate_str_func(tsframe, groupbyfunc):
tm.assert_frame_equal(result, expected)


def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func):
gb = df.groupby(level=0)
if reduction_func in ("idxmax", "idxmin"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this new?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With regards to idxmax/idxmin, no. Those are the two reduction_func that support axis=1 (whereas all others do not). However, they fail here because df has string and numeric columns.

error = TypeError
msg = "reduction operation '.*' not allowed for this dtype"
else:
error = ValueError
msg = f"Operation {reduction_func} does not support axis=1"
with pytest.raises(error, match=msg):
gb.agg(reduction_func, axis=1)


def test_aggregate_item_by_item(df):
grouped = df.groupby("A")

Expand Down