Skip to content

CLN: DataFrameGroupBy._cython_agg_general #30384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from textwrap import dedent
import typing
from typing import (
TYPE_CHECKING,
Any,
Callable,
FrozenSet,
Expand Down Expand Up @@ -68,6 +69,10 @@

from pandas.plotting import boxplot_frame_groupby

if TYPE_CHECKING:
from pandas.core.internals import Block


NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
# TODO(typing) the return value on this callable should be any *scalar*.
AggScalar = Union[str, Callable[..., Any]]
Expand Down Expand Up @@ -987,11 +992,11 @@ def _iterate_slices(self) -> Iterable[Series]:

def _cython_agg_general(
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
):
new_items, new_blocks = self._cython_agg_blocks(
) -> DataFrame:
agg_items, agg_blocks = self._cython_agg_blocks(
how, alt=alt, numeric_only=numeric_only, min_count=min_count
)
return self._wrap_agged_blocks(new_items, new_blocks)
return self._wrap_agged_blocks(agg_blocks, items=agg_items)

def _cython_agg_blocks(
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
Expand Down Expand Up @@ -1691,17 +1696,17 @@ def _wrap_transformed_output(

return result

def _wrap_agged_blocks(self, items, blocks):
def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFrame:
if not self.as_index:
index = np.arange(blocks[0].values.shape[-1])
mgr = BlockManager(blocks, [items, index])
mgr = BlockManager(blocks, axes=[items, index])
result = DataFrame(mgr)

self._insert_inaxis_grouper_inplace(result)
result = result._consolidate()
else:
index = self.grouper.result_index
mgr = BlockManager(blocks, [items, index])
mgr = BlockManager(blocks, axes=[items, index])
result = DataFrame(mgr)

if self.axis == 1:
Expand Down Expand Up @@ -1740,18 +1745,18 @@ def count(self):
ids, _, ngroups = self.grouper.group_info
mask = ids != -1

val = (
vals = (
(mask & ~_isna_ndarraylike(np.atleast_2d(blk.get_values())))
for blk in data.blocks
)
loc = (blk.mgr_locs for blk in data.blocks)
locs = (blk.mgr_locs for blk in data.blocks)

counted = [
lib.count_level_2d(x, labels=ids, max_bin=ngroups, axis=1) for x in val
]
blk = map(make_block, counted, loc)
counted = (
lib.count_level_2d(x, labels=ids, max_bin=ngroups, axis=1) for x in vals
)
blocks = [make_block(val, placement=loc) for val, loc in zip(counted, locs)]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just some minor cleanups above: pluralizing names + use list comprehension instead of map.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i tend to prefer these too

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this just be a generator expression or does that fail? I see this is equivalent to existing code but maybe adds unnecessary overhead

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would work, but wouldn't make any difference because it's passed into the BlockManager where it's stored.


return self._wrap_agged_blocks(data.items, list(blk))
return self._wrap_agged_blocks(blocks, items=data.items)

def nunique(self, dropna: bool = True):
"""
Expand Down