-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CLN: trim unreachable groupby paths #41361
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -266,7 +266,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) | |
func = maybe_mangle_lambdas(func) | ||
ret = self._aggregate_multiple_funcs(func) | ||
if relabeling: | ||
ret.columns = columns | ||
# error: Incompatible types in assignment (expression has type | ||
# "Optional[List[str]]", variable has type "Index") | ||
ret.columns = columns # type: ignore[assignment] | ||
return ret | ||
|
||
else: | ||
cyfunc = com.get_cython_func(func) | ||
if cyfunc and not args and not kwargs: | ||
|
@@ -282,33 +286,21 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) | |
# see test_groupby.test_basic | ||
result = self._aggregate_named(func, *args, **kwargs) | ||
|
||
index = Index(sorted(result), name=self.grouper.names[0]) | ||
ret = create_series_with_explicit_dtype( | ||
result, index=index, dtype_if_empty=object | ||
) | ||
|
||
if not self.as_index: # pragma: no cover | ||
print("Warning, ignoring as_index=True") | ||
|
||
if isinstance(ret, dict): | ||
from pandas import concat | ||
|
||
ret = concat(ret.values(), axis=1, keys=[key.label for key in ret.keys()]) | ||
return ret | ||
index = Index(sorted(result), name=self.grouper.names[0]) | ||
return create_series_with_explicit_dtype( | ||
result, index=index, dtype_if_empty=object | ||
) | ||
|
||
agg = aggregate | ||
|
||
def _aggregate_multiple_funcs(self, arg): | ||
def _aggregate_multiple_funcs(self, arg) -> DataFrame: | ||
if isinstance(arg, dict): | ||
|
||
# show the deprecation, but only if we | ||
# have not shown a higher level one | ||
# GH 15931 | ||
if isinstance(self._selected_obj, Series): | ||
raise SpecificationError("nested renamer is not supported") | ||
raise SpecificationError("nested renamer is not supported") | ||
|
||
columns = list(arg.keys()) | ||
arg = arg.items() | ||
elif any(isinstance(x, (tuple, list)) for x in arg): | ||
arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] | ||
|
||
|
@@ -335,8 +327,14 @@ def _aggregate_multiple_funcs(self, arg): | |
results[base.OutputKey(label=name, position=idx)] = obj.aggregate(func) | ||
|
||
if any(isinstance(x, DataFrame) for x in results.values()): | ||
# let higher level handle | ||
return results | ||
from pandas import concat | ||
|
||
res_df = concat( | ||
results.values(), axis=1, keys=[key.label for key in results.keys()] | ||
) | ||
# error: Incompatible return value type (got "Union[DataFrame, Series]", | ||
# expected "DataFrame") | ||
return res_df # type: ignore[return-value] | ||
|
||
indexed_output = {key.position: val for key, val in results.items()} | ||
output = self.obj._constructor_expanddim(indexed_output, index=None) | ||
|
@@ -1000,6 +998,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) | |
result = op.agg() | ||
if not is_dict_like(func) and result is not None: | ||
return result | ||
elif relabeling and result is not None: | ||
# this should be the only (non-raising) case with relabeling | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rhshadrach i wrote this comment a few years ago and now need help figuring it out. in particular, can re rule out There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes - it is not possible that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this condition can be replaced with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Also, as an aside, I think it was a misstep to try to reuse the code in |
||
# used reordered index of columns | ||
result = result.iloc[:, order] | ||
result.columns = columns | ||
|
||
if result is None: | ||
|
||
|
@@ -1039,12 +1042,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) | |
[sobj.columns.name] * result.columns.nlevels | ||
).droplevel(-1) | ||
|
||
if relabeling: | ||
|
||
# used reordered index of columns | ||
result = result.iloc[:, order] | ||
result.columns = columns | ||
|
||
if not self.as_index: | ||
self._insert_inaxis_grouper_inplace(result) | ||
result.index = np.arange(len(result)) | ||
|
@@ -1389,9 +1386,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: | |
if not output: | ||
raise TypeError("Transform function invalid for data types") | ||
|
||
columns = obj.columns | ||
if len(output) < len(obj.columns): | ||
columns = columns.take(inds) | ||
columns = obj.columns.take(inds) | ||
|
||
return self.obj._constructor(output, index=obj.index, columns=columns) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could outdent (and remove the else) here to make it more obvious
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
considered that, but this mirrors the layout of the DataFrameGroupBy method and im holding out hope of sharing more of that