Skip to content

Commit 8460340

Browse files
authored
REF: Move agg helpers into apply (#39498)
1 parent bd731d4 commit 8460340

File tree

3 files changed

+217
-224
lines changed

3 files changed

+217
-224
lines changed

pandas/core/aggregation.py

Lines changed: 2 additions & 216 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,16 @@
2727
AggFuncType,
2828
AggFuncTypeBase,
2929
AggFuncTypeDict,
30-
AggObjType,
3130
Axis,
3231
FrameOrSeries,
3332
FrameOrSeriesUnion,
3433
)
3534

36-
from pandas.core.dtypes.cast import is_nested_object
3735
from pandas.core.dtypes.common import is_dict_like, is_list_like
38-
from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
36+
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
3937

4038
from pandas.core.algorithms import safe_sort
41-
from pandas.core.base import DataError, SpecificationError
39+
from pandas.core.base import SpecificationError
4240
import pandas.core.common as com
4341
from pandas.core.indexes.api import Index
4442

@@ -532,215 +530,3 @@ def transform_str_or_callable(
532530
return obj.apply(func, args=args, **kwargs)
533531
except Exception:
534532
return func(obj, *args, **kwargs)
535-
536-
537-
def agg_list_like(
538-
obj: AggObjType,
539-
arg: List[AggFuncTypeBase],
540-
_axis: int,
541-
) -> FrameOrSeriesUnion:
542-
"""
543-
Compute aggregation in the case of a list-like argument.
544-
545-
Parameters
546-
----------
547-
obj : Pandas object to compute aggregation on.
548-
arg : list
549-
Aggregations to compute.
550-
_axis : int, 0 or 1
551-
Axis to compute aggregation on.
552-
553-
Returns
554-
-------
555-
Result of aggregation.
556-
"""
557-
from pandas.core.reshape.concat import concat
558-
559-
if _axis != 0:
560-
raise NotImplementedError("axis other than 0 is not supported")
561-
562-
if obj._selected_obj.ndim == 1:
563-
selected_obj = obj._selected_obj
564-
else:
565-
selected_obj = obj._obj_with_exclusions
566-
567-
results = []
568-
keys = []
569-
570-
# degenerate case
571-
if selected_obj.ndim == 1:
572-
for a in arg:
573-
colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
574-
try:
575-
new_res = colg.aggregate(a)
576-
577-
except TypeError:
578-
pass
579-
else:
580-
results.append(new_res)
581-
582-
# make sure we find a good name
583-
name = com.get_callable_name(a) or a
584-
keys.append(name)
585-
586-
# multiples
587-
else:
588-
for index, col in enumerate(selected_obj):
589-
colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
590-
try:
591-
new_res = colg.aggregate(arg)
592-
except (TypeError, DataError):
593-
pass
594-
except ValueError as err:
595-
# cannot aggregate
596-
if "Must produce aggregated value" in str(err):
597-
# raised directly in _aggregate_named
598-
pass
599-
elif "no results" in str(err):
600-
# raised directly in _aggregate_multiple_funcs
601-
pass
602-
else:
603-
raise
604-
else:
605-
results.append(new_res)
606-
keys.append(col)
607-
608-
# if we are empty
609-
if not len(results):
610-
raise ValueError("no results")
611-
612-
try:
613-
return concat(results, keys=keys, axis=1, sort=False)
614-
except TypeError as err:
615-
616-
# we are concatting non-NDFrame objects,
617-
# e.g. a list of scalars
618-
619-
from pandas import Series
620-
621-
result = Series(results, index=keys, name=obj.name)
622-
if is_nested_object(result):
623-
raise ValueError(
624-
"cannot combine transform and aggregation operations"
625-
) from err
626-
return result
627-
628-
629-
def agg_dict_like(
630-
obj: AggObjType,
631-
arg: AggFuncTypeDict,
632-
_axis: int,
633-
) -> FrameOrSeriesUnion:
634-
"""
635-
Compute aggregation in the case of a dict-like argument.
636-
637-
Parameters
638-
----------
639-
obj : Pandas object to compute aggregation on.
640-
arg : dict
641-
label-aggregation pairs to compute.
642-
_axis : int, 0 or 1
643-
Axis to compute aggregation on.
644-
645-
Returns
646-
-------
647-
Result of aggregation.
648-
"""
649-
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
650-
651-
if _axis != 0: # pragma: no cover
652-
raise ValueError("Can only pass dict with axis=0")
653-
654-
selected_obj = obj._selected_obj
655-
656-
# if we have a dict of any non-scalars
657-
# eg. {'A' : ['mean']}, normalize all to
658-
# be list-likes
659-
# Cannot use arg.values() because arg may be a Series
660-
if any(is_aggregator(x) for _, x in arg.items()):
661-
new_arg: AggFuncTypeDict = {}
662-
for k, v in arg.items():
663-
if not isinstance(v, (tuple, list, dict)):
664-
new_arg[k] = [v]
665-
else:
666-
new_arg[k] = v
667-
668-
# the keys must be in the columns
669-
# for ndim=2, or renamers for ndim=1
670-
671-
# ok for now, but deprecated
672-
# {'A': { 'ra': 'mean' }}
673-
# {'A': { 'ra': ['mean'] }}
674-
# {'ra': ['mean']}
675-
676-
# not ok
677-
# {'ra' : { 'A' : 'mean' }}
678-
if isinstance(v, dict):
679-
raise SpecificationError("nested renamer is not supported")
680-
elif isinstance(selected_obj, ABCSeries):
681-
raise SpecificationError("nested renamer is not supported")
682-
elif (
683-
isinstance(selected_obj, ABCDataFrame) and k not in selected_obj.columns
684-
):
685-
raise KeyError(f"Column '{k}' does not exist!")
686-
687-
arg = new_arg
688-
689-
else:
690-
# deprecation of renaming keys
691-
# GH 15931
692-
keys = list(arg.keys())
693-
if isinstance(selected_obj, ABCDataFrame) and len(
694-
selected_obj.columns.intersection(keys)
695-
) != len(keys):
696-
cols = list(
697-
safe_sort(
698-
list(set(keys) - set(selected_obj.columns.intersection(keys))),
699-
)
700-
)
701-
raise SpecificationError(f"Column(s) {cols} do not exist")
702-
703-
from pandas.core.reshape.concat import concat
704-
705-
if selected_obj.ndim == 1:
706-
# key only used for output
707-
colg = obj._gotitem(obj._selection, ndim=1)
708-
results = {key: colg.agg(how) for key, how in arg.items()}
709-
else:
710-
# key used for column selection and output
711-
results = {key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()}
712-
713-
# set the final keys
714-
keys = list(arg.keys())
715-
716-
# Avoid making two isinstance calls in all and any below
717-
is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
718-
719-
# combine results
720-
if all(is_ndframe):
721-
keys_to_use = [k for k in keys if not results[k].empty]
722-
# Have to check, if at least one DataFrame is not empty.
723-
keys_to_use = keys_to_use if keys_to_use != [] else keys
724-
axis = 0 if isinstance(obj, ABCSeries) else 1
725-
result = concat({k: results[k] for k in keys_to_use}, axis=axis)
726-
elif any(is_ndframe):
727-
# There is a mix of NDFrames and scalars
728-
raise ValueError(
729-
"cannot perform both aggregation "
730-
"and transformation operations "
731-
"simultaneously"
732-
)
733-
else:
734-
from pandas import Series
735-
736-
# we have a dict of scalars
737-
# GH 36212 use name only if obj is a series
738-
if obj.ndim == 1:
739-
obj = cast("Series", obj)
740-
name = obj.name
741-
else:
742-
name = None
743-
744-
result = Series(results, name=name)
745-
746-
return result

0 commit comments

Comments
 (0)