-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: GroupBy.apply() returns different results if a different GroupBy method is called first #35314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 33 commits
e84a15d
2f764db
e122809
27b2694
0cca6df
2786eb5
33cdf65
6a65e2f
9948d2f
e4a132e
4170ca6
0ab1c8a
f2a32f4
f5b674b
7028756
45abe63
063f0ea
7f0d192
974da63
b395e39
2405504
682c93a
67e9744
8f1b9c9
804b0e0
f422b7d
b07a290
6bec040
8cdd4cd
abe8be3
7112cf8
0c8b144
755c8f0
8951a73
b61695b
673a35b
42f53dd
18634a6
95553a1
1a0aa44
b09e41e
a7e264f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -734,13 +734,12 @@ def pipe(self, func, *args, **kwargs): | |
def _make_wrapper(self, name): | ||
assert name in self._apply_allowlist | ||
|
||
self._set_group_selection() | ||
|
||
# need to setup the selection | ||
# as are not passed directly but in the grouper | ||
f = getattr(self._obj_with_exclusions, name) | ||
if not isinstance(f, types.MethodType): | ||
return self.apply(lambda self: getattr(self, name)) | ||
with _group_selection_context(self): | ||
# need to setup the selection | ||
# as are not passed directly but in the grouper | ||
f = getattr(self._obj_with_exclusions, name) | ||
if not isinstance(f, types.MethodType): | ||
return self.apply(lambda self: getattr(self, name)) | ||
|
||
f = getattr(type(self._obj_with_exclusions), name) | ||
sig = inspect.signature(f) | ||
|
@@ -990,28 +989,30 @@ def _agg_general( | |
alias: str, | ||
npfunc: Callable, | ||
): | ||
self._set_group_selection() | ||
|
||
# try a cython aggregation if we can | ||
try: | ||
return self._cython_agg_general( | ||
how=alias, alt=npfunc, numeric_only=numeric_only, min_count=min_count, | ||
) | ||
except DataError: | ||
pass | ||
except NotImplementedError as err: | ||
if "function is not implemented for this dtype" in str( | ||
err | ||
) or "category dtype not supported" in str(err): | ||
# raised in _get_cython_function, in some cases can | ||
# be trimmed by implementing cython funcs for more dtypes | ||
with _group_selection_context(self): | ||
# try a cython aggregation if we can | ||
try: | ||
return self._cython_agg_general( | ||
how=alias, | ||
alt=npfunc, | ||
numeric_only=numeric_only, | ||
min_count=min_count, | ||
) | ||
except DataError: | ||
pass | ||
else: | ||
raise | ||
except NotImplementedError as err: | ||
if "function is not implemented for this dtype" in str( | ||
err | ||
) or "category dtype not supported" in str(err): | ||
# raised in _get_cython_function, in some cases can | ||
# be trimmed by implementing cython funcs for more dtypes | ||
pass | ||
else: | ||
raise | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wow, codecov pointed that this is not covered, can you see if you can get a test to lands here (otherwise remove it) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could not find a way to reach this line from any public function call. AFAICS anything that would raise a So I've gone with removing it. |
||
|
||
# apply a non-cython aggregation | ||
result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) | ||
return result | ||
# apply a non-cython aggregation | ||
result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) | ||
return result | ||
|
||
def _cython_agg_general( | ||
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 | ||
|
@@ -1928,29 +1929,31 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra | |
nth_values = list(set(n)) | ||
|
||
nth_array = np.array(nth_values, dtype=np.intp) | ||
self._set_group_selection() | ||
with _group_selection_context(self): | ||
|
||
mask_left = np.in1d(self._cumcount_array(), nth_array) | ||
mask_right = np.in1d(self._cumcount_array(ascending=False) + 1, -nth_array) | ||
mask = mask_left | mask_right | ||
mask_left = np.in1d(self._cumcount_array(), nth_array) | ||
mask_right = np.in1d( | ||
self._cumcount_array(ascending=False) + 1, -nth_array | ||
) | ||
mask = mask_left | mask_right | ||
|
||
ids, _, _ = self.grouper.group_info | ||
ids, _, _ = self.grouper.group_info | ||
|
||
# Drop NA values in grouping | ||
mask = mask & (ids != -1) | ||
# Drop NA values in grouping | ||
mask = mask & (ids != -1) | ||
|
||
out = self._selected_obj[mask] | ||
if not self.as_index: | ||
return out | ||
out = self._selected_obj[mask] | ||
if not self.as_index: | ||
return out | ||
|
||
result_index = self.grouper.result_index | ||
out.index = result_index[ids[mask]] | ||
result_index = self.grouper.result_index | ||
out.index = result_index[ids[mask]] | ||
|
||
if not self.observed and isinstance(result_index, CategoricalIndex): | ||
out = out.reindex(result_index) | ||
if not self.observed and isinstance(result_index, CategoricalIndex): | ||
out = out.reindex(result_index) | ||
|
||
out = self._reindex_output(out) | ||
return out.sort_index() if self.sort else out | ||
out = self._reindex_output(out) | ||
return out.sort_index() if self.sort else out | ||
|
||
# dropna is truthy | ||
if isinstance(n, valid_containers): | ||
|
Uh oh!
There was an error while loading. Please reload this page.