Skip to content

REF: prune groupby paths #51187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,16 @@ def _wrap_applied_output(
"""
if len(values) == 0:
# GH #6265
if is_transform:
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
else:
res_index = self.grouper.result_index

return self.obj._constructor(
[],
name=self.obj.name,
index=self.grouper.result_index,
index=res_index,
dtype=data.dtype,
)
assert values is not None
Expand Down Expand Up @@ -1136,14 +1142,12 @@ def cov(
@property
@doc(Series.is_monotonic_increasing.__doc__)
def is_monotonic_increasing(self) -> Series:
result = self._op_via_apply("is_monotonic_increasing")
return result
return self.apply(lambda ser: ser.is_monotonic_increasing)

@property
@doc(Series.is_monotonic_decreasing.__doc__)
def is_monotonic_decreasing(self) -> Series:
result = self._op_via_apply("is_monotonic_decreasing")
return result
return self.apply(lambda ser: ser.is_monotonic_decreasing)

@doc(Series.hist.__doc__)
def hist(
Expand Down Expand Up @@ -1181,8 +1185,7 @@ def hist(
@property
@doc(Series.dtype.__doc__)
def dtype(self) -> Series:
result = self._op_via_apply("dtype")
return result
return self.apply(lambda ser: ser.dtype)

@doc(Series.unique.__doc__)
def unique(self) -> Series:
Expand Down Expand Up @@ -1428,9 +1431,13 @@ def _wrap_applied_output(
):

if len(values) == 0:
result = self.obj._constructor(
index=self.grouper.result_index, columns=data.columns
)
if is_transform:
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
else:
res_index = self.grouper.result_index

result = self.obj._constructor(index=res_index, columns=data.columns)
result = result.astype(data.dtypes, copy=False)
return result

Expand Down Expand Up @@ -1719,18 +1726,11 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
# iterate through columns, see test_transform_exclude_nuisance
# gets here with non-unique columns
output = {}
inds = []
for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)):
output[i] = sgb.transform(wrapper)
inds.append(i)

if not output:
raise TypeError("Transform function invalid for data types")

columns = obj.columns.take(inds)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we can also get rid of inds in this method

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, will update


result = self.obj._constructor(output, index=obj.index)
result.columns = columns
result.columns = obj.columns
return result

def filter(self, func, dropna: bool = True, *args, **kwargs):
Expand Down Expand Up @@ -2677,8 +2677,8 @@ def hist(
@property
@doc(DataFrame.dtypes.__doc__)
def dtypes(self) -> Series:
result = self._op_via_apply("dtypes")
return result
# error: Incompatible return value type (got "DataFrame", expected "Series")
return self.apply(lambda df: df.dtypes) # type: ignore[return-value]

@doc(DataFrame.corrwith.__doc__)
def corrwith(
Expand Down
70 changes: 19 additions & 51 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,9 +956,6 @@ def __getattr__(self, attr: str):
def _op_via_apply(self, name: str, *args, **kwargs):
"""Compute the result of an operation by using GroupBy's apply."""
f = getattr(type(self._obj_with_exclusions), name)
if not callable(f):
return self.apply(lambda self: getattr(self, name))

sig = inspect.signature(f)

# a little trickery for aggregation functions that need an axis
Expand All @@ -980,9 +977,6 @@ def curried(x):
return self.apply(curried)

is_transform = name in base.transformation_kernels
# Transform needs to keep the same schema, including when empty
if is_transform and self._obj_with_exclusions.empty:
return self._obj_with_exclusions
result = self._python_apply_general(
curried,
self._obj_with_exclusions,
Expand Down Expand Up @@ -1105,6 +1099,7 @@ def _set_result_index_ordered(

return result

@final
def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
if isinstance(result, Series):
result = result.to_frame()
Expand All @@ -1131,30 +1126,22 @@ def _indexed_output_to_ndframe(
@final
def _wrap_aggregated_output(
self,
output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike],
result: Series | DataFrame,
qs: npt.NDArray[np.float64] | None = None,
):
"""
Wraps the output of GroupBy aggregations into the expected result.

Parameters
----------
output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike]
Data to wrap.
result : Series, DataFrame

Returns
-------
Series or DataFrame
"""

if isinstance(output, (Series, DataFrame)):
# We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce,
# in which case our columns are already set correctly.
# ATM we do not get here for SeriesGroupBy; when we do, we will
# need to require that result.name already match self.obj.name
result = output
else:
result = self._indexed_output_to_ndframe(output)
# ATM we do not get here for SeriesGroupBy; when we do, we will
# need to require that result.name already match self.obj.name

if not self.as_index:
# `not self.as_index` is only relevant for DataFrameGroupBy,
Expand Down Expand Up @@ -1183,36 +1170,6 @@ def _wrap_aggregated_output(

return self._reindex_output(result, qs=qs)

@final
def _wrap_transformed_output(
self, output: Mapping[base.OutputKey, ArrayLike]
) -> Series | DataFrame:
"""
Wraps the output of GroupBy transformations into the expected result.

Parameters
----------
output : Mapping[base.OutputKey, ArrayLike]
Data to wrap.

Returns
-------
Series or DataFrame
Series for SeriesGroupBy, DataFrame for DataFrameGroupBy
"""
if isinstance(output, (Series, DataFrame)):
result = output
else:
result = self._indexed_output_to_ndframe(output)

if self.axis == 1:
# Only relevant for DataFrameGroupBy
result = result.T
result.columns = self.obj.columns

result.index = self.obj.index
return result

def _wrap_applied_output(
self,
data,
Expand Down Expand Up @@ -1456,7 +1413,8 @@ def _python_agg_general(self, func, *args, **kwargs):
output: dict[base.OutputKey, ArrayLike] = {}

if self.ngroups == 0:
# agg_series below assumes ngroups > 0
# e.g. test_evaluate_with_empty_groups different path gets different
# result dtype in empty case.
return self._python_apply_general(f, self._selected_obj, is_agg=True)

for idx, obj in enumerate(self._iterate_slices()):
Expand All @@ -1466,9 +1424,11 @@ def _python_agg_general(self, func, *args, **kwargs):
output[key] = result

if not output:
# e.g. test_groupby_crash_on_nunique, test_margins_no_values_no_cols
return self._python_apply_general(f, self._selected_obj)

return self._wrap_aggregated_output(output)
res = self._indexed_output_to_ndframe(output)
return self._wrap_aggregated_output(res)

@final
def _agg_general(
Expand Down Expand Up @@ -1850,6 +1810,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
# If we are grouping on categoricals we want unobserved categories to
# return zero, rather than the default of NaN which the reindexing in
# _wrap_agged_manager() returns. GH 35028
# e.g. test_dataframe_groupby_on_2_categoricals_when_observed_is_false
with com.temp_setattr(self, "observed", True):
result = self._wrap_agged_manager(new_mgr)

Expand Down Expand Up @@ -2577,6 +2538,7 @@ def ohlc(self) -> DataFrame:
)
return self._reindex_output(result)

# TODO: 2023-02-05 all tests that get here have self.as_index
return self._apply_to_column_groupbys(
lambda x: x.ohlc(), self._obj_with_exclusions
)
Expand Down Expand Up @@ -2854,7 +2816,13 @@ def blk_func(values: ArrayLike) -> ArrayLike:
if isinstance(new_obj, Series):
new_obj.name = obj.name

return self._wrap_transformed_output(new_obj)
if self.axis == 1:
# Only relevant for DataFrameGroupBy
new_obj = new_obj.T
new_obj.columns = self.obj.columns

new_obj.index = self.obj.index
return new_obj

@final
@Substitution(name="groupby")
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,7 +1028,6 @@ def _aggregate_series_pure_python(
) -> npt.NDArray[np.object_]:
ids, _, ngroups = self.group_info

counts = np.zeros(ngroups, dtype=int)
result = np.empty(ngroups, dtype="O")
initialized = False

Expand All @@ -1044,7 +1043,6 @@ def _aggregate_series_pure_python(
libreduction.check_result_array(res, group.dtype)
initialized = True

counts[i] = group.shape[0]
result[i] = res

return result
Expand Down
1 change: 1 addition & 0 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
try:
if isinstance(obj, ABCDataFrame) and callable(how):
# Check if the function is reducing or not.
# e.g. test_resample_apply_with_additional_args
result = grouped._aggregate_item_by_item(how, *args, **kwargs)
else:
result = grouped.aggregate(how, *args, **kwargs)
Expand Down