From bc0019a9957295331cd8f1e9694aa8c049df7800 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 31 Dec 2020 16:12:01 -0500 Subject: [PATCH 1/3] REF: Move aggregation into apply --- pandas/core/apply.py | 71 ++++++++++++++++++++++++++++++++++++++++++-- pandas/core/frame.py | 25 ++++++++++------ 2 files changed, 84 insertions(+), 12 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c0c6c9084b560..9d5f489622700 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1,12 +1,18 @@ import abc import inspect -from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Tuple, Type +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Type, cast import numpy as np from pandas._config import option_context -from pandas._typing import AggFuncType, Axis, FrameOrSeriesUnion +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + AggFuncTypeDict, + Axis, + FrameOrSeriesUnion, +) from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -17,6 +23,7 @@ ) from pandas.core.dtypes.generic import ABCSeries +from pandas.core.aggregation import agg_dict_like, agg_list_like from pandas.core.construction import create_series_with_explicit_dtype if TYPE_CHECKING: @@ -27,6 +34,7 @@ def frame_apply( obj: "DataFrame", + how: str, func: AggFuncType, axis: Axis = 0, raw: bool = False, @@ -35,6 +43,7 @@ def frame_apply( kwds=None, ): """ construct and return a row or column based frame apply object """ + assert how in ("apply", "agg") axis = obj._get_axis_number(axis) klass: Type[FrameApply] if axis == 0: @@ -44,6 +53,7 @@ def frame_apply( return klass( obj, + how, func, raw=raw, result_type=result_type, @@ -84,13 +94,16 @@ def wrap_results_for_axis( def __init__( self, obj: "DataFrame", + how: str, func, raw: bool, result_type: Optional[str], args, kwds, ): + assert how in ("apply", "agg") self.obj = obj + self.how = how self.raw = raw self.args = args or () self.kwds = kwds or {} @@ -104,7 +117,11 @@ def __init__( self.result_type = result_type # curry if needed - if (kwds or args) and not isinstance(func, (np.ufunc, str)): + if ( + (kwds or args) + and not isinstance(func, (np.ufunc, str)) + and not is_list_like(func) + ): def f(x): return func(x, *args, **kwds) @@ -139,6 +156,54 @@ def agg_axis(self) -> "Index": return self.obj._get_agg_axis(self.axis) def get_result(self): + if self.how == "apply": + return self.apply() + else: + return self.agg() + + def agg(self): + """ + Provide an implementation for the aggregators. + + Returns + ------- + tuple of result, how. + + Notes + ----- + how can be a string describe the required post-processing, or + None if not required. + """ + obj = self.obj + arg = self.f + args = self.args + kwargs = self.kwds + + _axis = kwargs.pop("_axis", None) + if _axis is None: + _axis = getattr(obj, "axis", 0) + + if isinstance(arg, str): + return obj._try_aggregate_string_function(arg, *args, **kwargs), None + elif is_dict_like(arg): + arg = cast(AggFuncTypeDict, arg) + return agg_dict_like(obj, arg, _axis), True + elif is_list_like(arg): + # we require a list, but not an 'str' + arg = cast(List[AggFuncTypeBase], arg) + return agg_list_like(obj, arg, _axis=_axis), None + else: + result = None + + if callable(arg): + f = obj._get_cython_func(arg) + if f and not args and not kwargs: + return getattr(obj, f)(), None + + # caller can react + return result, True + + def apply(self): """ compute the results """ # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc89823cd7817..6e9a8ab972abc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -121,12 +121,7 @@ from pandas.core import algorithms, common as com, generic, nanops, ops from pandas.core.accessor import CachedAccessor -from pandas.core.aggregation import ( - aggregate, - reconstruct_func, - relabel_result, - transform, -) +from pandas.core.aggregation import reconstruct_func, relabel_result, transform from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseFrameAccessor @@ -7623,13 +7618,24 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): return result def _aggregate(self, arg, axis: Axis = 0, *args, **kwargs): + from pandas.core.apply import frame_apply + + op = frame_apply( + self if axis == 0 else self.T, + how="agg", + func=arg, + axis=0, + args=args, + kwds=kwargs, + ) + result, how = op.get_result() + if axis == 1: # NDFrame.aggregate returns a tuple, and we need to transpose # only result - result, how = aggregate(self.T, arg, *args, **kwargs) result = result.T if result is not None else result - return result, how - return aggregate(self, arg, *args, **kwargs) + + return result, how agg = aggregate @@ -7789,6 +7795,7 @@ def apply( op = frame_apply( self, + how="apply", func=func, axis=axis, raw=raw, From dd99b04c4bb4f69bb62b98d8dc619548935768ad Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 1 Jan 2021 09:05:21 -0500 Subject: [PATCH 2/3] mypy --- pandas/core/apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9d5f489622700..3a4a7883c93fd 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -129,7 +129,7 @@ def f(x): else: f = func - self.f = f + self.f: Any = f @property def res_columns(self) -> "Index": From 4261b4f695ff02ce78a1c01ea6cd2529cef2cd76 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 2 Jan 2021 11:28:18 -0500 Subject: [PATCH 3/3] removed assert, added type-hints --- pandas/core/apply.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 3a4a7883c93fd..edb6b97a73e7f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -43,7 +43,6 @@ def frame_apply( kwds=None, ): """ construct and return a row or column based frame apply object """ - assert how in ("apply", "agg") axis = obj._get_axis_number(axis) klass: Type[FrameApply] if axis == 0: @@ -129,7 +128,7 @@ def f(x): else: f = func - self.f: Any = f + self.f: AggFuncType = f @property def res_columns(self) -> "Index": @@ -161,7 +160,7 @@ def get_result(self): else: return self.agg() - def agg(self): + def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: """ Provide an implementation for the aggregators. @@ -189,7 +188,7 @@ def agg(self): arg = cast(AggFuncTypeDict, arg) return agg_dict_like(obj, arg, _axis), True elif is_list_like(arg): - # we require a list, but not an 'str' + # we require a list, but not a 'str' arg = cast(List[AggFuncTypeBase], arg) return agg_list_like(obj, arg, _axis=_axis), None else: @@ -203,7 +202,7 @@ def agg(self): # caller can react return result, True - def apply(self): + def apply(self) -> FrameOrSeriesUnion: """ compute the results """ # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): @@ -256,6 +255,8 @@ def apply_empty_result(self): we will try to apply the function to an empty series in order to see if this is a reduction function """ + assert callable(self.f) + # we are not asked to reduce or infer reduction # so just return a copy of the existing object if self.result_type not in ["reduce", None]: @@ -311,6 +312,8 @@ def wrapper(*args, **kwargs): return self.obj._constructor_sliced(result, index=self.agg_axis) def apply_broadcast(self, target: "DataFrame") -> "DataFrame": + assert callable(self.f) + result_values = np.empty_like(target.values) # axis which we want to compare compliance @@ -344,6 +347,8 @@ def apply_standard(self): return self.wrap_results(results, res_index) def apply_series_generator(self) -> Tuple[ResType, "Index"]: + assert callable(self.f) + series_gen = self.series_generator res_index = self.result_index