Skip to content

CLN: Use group_info less #57598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,7 @@ def value_counts(
from pandas.core.reshape.merge import get_join_indexers
from pandas.core.reshape.tile import cut

ids, _ = self._grouper.group_info
ids = self._grouper.ids
val = self.obj._values

index_names = self._grouper.names + [self.obj.name]
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ def _concat_objects(

ax = self._selected_obj.index
if self.dropna:
labels = self._grouper.group_info[0]
labels = self._grouper.ids
mask = labels != -1
ax = ax[mask]

Expand Down Expand Up @@ -1423,7 +1423,7 @@ def _numba_agg_general(
)
# Pass group ids to kernel directly if it can handle it
# (This is faster since it doesn't require a sort)
ids, _ = self._grouper.group_info
ids = self._grouper.ids
ngroups = self._grouper.ngroups

res_mgr = df._mgr.apply(
Expand Down Expand Up @@ -4163,7 +4163,7 @@ def _nth(
if not dropna:
mask = self._make_mask_from_positional_indexer(n)

ids, _ = self._grouper.group_info
ids = self._grouper.ids

# Drop NA values in grouping
mask = mask & (ids != -1)
Expand Down Expand Up @@ -4503,7 +4503,7 @@ def ngroup(self, ascending: bool = True):
"""
obj = self._obj_with_exclusions
index = obj.index
comp_ids = self._grouper.group_info[0]
comp_ids = self._grouper.ids

dtype: type
if self._grouper.has_dropped_na:
Expand Down Expand Up @@ -5382,7 +5382,7 @@ def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT:
Series or DataFrame
Filtered _selected_obj.
"""
ids = self._grouper.group_info[0]
ids = self._grouper.ids
mask = mask & (ids != -1)
return self._selected_obj[mask]

Expand Down
15 changes: 6 additions & 9 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def groups(self) -> dict[Hashable, Index]:
@cache_readonly
def is_monotonic(self) -> bool:
# return if my group orderings are monotonic
return Index(self.group_info[0]).is_monotonic_increasing
return Index(self.ids).is_monotonic_increasing

@final
@cache_readonly
Expand All @@ -735,8 +735,7 @@ def group_info(self) -> tuple[npt.NDArray[np.intp], int]:
@cache_readonly
def codes_info(self) -> npt.NDArray[np.intp]:
# return the codes of items in original grouped axis
ids, _ = self.group_info
return ids
return self.ids

@final
@cache_readonly
Expand Down Expand Up @@ -933,9 +932,7 @@ def agg_series(
def _aggregate_series_pure_python(
self, obj: Series, func: Callable
) -> npt.NDArray[np.object_]:
_, ngroups = self.group_info

result = np.empty(ngroups, dtype="O")
result = np.empty(self.ngroups, dtype="O")
initialized = False

splitter = self._get_splitter(obj)
Expand Down Expand Up @@ -1073,7 +1070,7 @@ def nkeys(self) -> int:
@cache_readonly
def codes_info(self) -> npt.NDArray[np.intp]:
# return the codes of items in original grouped axis
ids, _ = self.group_info
ids = self.ids
if self.indexer is not None:
sorter = np.lexsort((ids, self.indexer))
ids = ids[sorter]
Expand Down Expand Up @@ -1133,7 +1130,7 @@ def result_index(self) -> Index:

@cache_readonly
def codes(self) -> list[npt.NDArray[np.intp]]:
return [self.group_info[0]]
return [self.ids]

@cache_readonly
def result_index_and_ids(self):
Expand All @@ -1150,7 +1147,7 @@ def names(self) -> list[Hashable]:
@property
def groupings(self) -> list[grouper.Grouping]:
lev = self.binlabels
codes = self.group_info[0]
codes = self.ids
labels = lev.take(codes)
ping = grouper.Grouping(
labels, labels, in_axis=False, level=None, uniques=lev._values
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,10 +776,10 @@ def test_groupby_empty(self):
# check group properties
assert len(gr._grouper.groupings) == 1
tm.assert_numpy_array_equal(
gr._grouper.group_info[0], np.array([], dtype=np.dtype(np.intp))
gr._grouper.ids, np.array([], dtype=np.dtype(np.intp))
)

assert gr._grouper.group_info[1] == 0
assert gr._grouper.ngroups == 0

# check name
gb = s.groupby(s)
Expand Down