Skip to content

TYP: Annotate groupby/ops.py #32921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Apr 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2fd14c2
TYP: Annotate groupby/ops.py
dsaxton Mar 23, 2020
789039b
Blacken
dsaxton Mar 23, 2020
0e0cff3
Update pandas/core/groupby/ops.py
dsaxton Mar 23, 2020
1282d17
Use ellipsis
dsaxton Mar 23, 2020
bc87698
List -> List[Index]
dsaxton Mar 23, 2020
06a6c2d
Specify Callable types
dsaxton Mar 23, 2020
e9677f6
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Mar 23, 2020
becfb17
More Callable subscripts
dsaxton Mar 23, 2020
14e8572
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Mar 24, 2020
b9c7043
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Mar 25, 2020
0ba3079
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Mar 26, 2020
36fc7f0
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Mar 26, 2020
a463503
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Mar 28, 2020
2fa9ecb
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Mar 29, 2020
57daaba
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 2, 2020
ce69bfb
Update
dsaxton Apr 2, 2020
1b11b28
No ArrayLike
dsaxton Apr 2, 2020
e16ecc2
Import
dsaxton Apr 2, 2020
2abaa40
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 4, 2020
301b6cc
Update
dsaxton Apr 4, 2020
0aa272f
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 6, 2020
2c1dd6e
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 10, 2020
43b843d
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 10, 2020
bcd964d
Use F
dsaxton Apr 10, 2020
60b924d
Lint
dsaxton Apr 10, 2020
7e5694e
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 10, 2020
4621788
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 11, 2020
d7465c7
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 14, 2020
4f13b5f
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 17, 2020
6419d1a
Merge remote-tracking branch 'upstream/master' into type-ops
dsaxton Apr 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ class Grouping:
index : Index
grouper :
obj Union[DataFrame, Series]:
name :
name : Label
level :
observed : bool, default False
If we are a Categorical, use the observed values
Expand Down
48 changes: 34 additions & 14 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pandas._libs import NaT, iNaT, lib
import pandas._libs.groupby as libgroupby
import pandas._libs.reduction as libreduction
from pandas._typing import FrameOrSeries
from pandas._typing import F, FrameOrSeries, Label
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly

Expand Down Expand Up @@ -110,7 +110,7 @@ def groupings(self) -> List["grouper.Grouping"]:
return self._groupings

@property
def shape(self):
def shape(self) -> Tuple[int, ...]:
return tuple(ping.ngroups for ping in self.groupings)

def __iter__(self):
Expand Down Expand Up @@ -156,7 +156,7 @@ def _get_group_keys(self):
# provide "flattened" iterator for multi-group setting
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)

def apply(self, f, data: FrameOrSeries, axis: int = 0):
def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
mutated = self.mutated
splitter = self._get_splitter(data, axis=axis)
group_keys = self._get_group_keys()
Expand Down Expand Up @@ -237,7 +237,7 @@ def levels(self) -> List[Index]:
return [ping.group_index for ping in self.groupings]

@property
def names(self):
def names(self) -> List[Label]:
return [ping.name for ping in self.groupings]

def size(self) -> Series:
Expand Down Expand Up @@ -315,7 +315,7 @@ def result_index(self) -> Index:
)
return result

def get_group_levels(self):
def get_group_levels(self) -> List[Index]:
if not self.compressed and len(self.groupings) == 1:
return [self.groupings[0].result_index]

Expand Down Expand Up @@ -364,7 +364,9 @@ def _is_builtin_func(self, arg):
"""
return SelectionMixin._builtin_table.get(arg, arg)

def _get_cython_function(self, kind: str, how: str, values, is_numeric: bool):
def _get_cython_function(
self, kind: str, how: str, values: np.ndarray, is_numeric: bool
):

dtype_str = values.dtype.name
ftype = self._cython_functions[kind][how]
Expand Down Expand Up @@ -433,7 +435,7 @@ def _get_cython_func_and_vals(
return func, values

def _cython_operation(
self, kind: str, values, how: str, axis, min_count: int = -1, **kwargs
self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
) -> Tuple[np.ndarray, Optional[List[str]]]:
"""
Returns the values of a cython operation as a Tuple of [data, names].
Expand Down Expand Up @@ -617,7 +619,13 @@ def _transform(
return result

def agg_series(
self, obj: Series, func, *args, engine="cython", engine_kwargs=None, **kwargs
self,
obj: Series,
func: F,
*args,
engine: str = "cython",
engine_kwargs=None,
**kwargs,
):
# Caller is responsible for checking ngroups != 0
assert self.ngroups != 0
Expand Down Expand Up @@ -651,7 +659,7 @@ def agg_series(
raise
return self._aggregate_series_pure_python(obj, func)

def _aggregate_series_fast(self, obj: Series, func):
def _aggregate_series_fast(self, obj: Series, func: F):
# At this point we have already checked that
# - obj.index is not a MultiIndex
# - obj is backed by an ndarray, not ExtensionArray
Expand All @@ -671,7 +679,13 @@ def _aggregate_series_fast(self, obj: Series, func):
return result, counts

def _aggregate_series_pure_python(
self, obj: Series, func, *args, engine="cython", engine_kwargs=None, **kwargs
self,
obj: Series,
func: F,
*args,
engine: str = "cython",
engine_kwargs=None,
**kwargs,
):

if engine == "numba":
Expand Down Expand Up @@ -860,11 +874,11 @@ def result_index(self):
return self.binlabels

@property
def levels(self):
def levels(self) -> List[Index]:
return [self.binlabels]

@property
def names(self):
def names(self) -> List[Label]:
return [self.binlabels.name]

@property
Expand All @@ -875,7 +889,13 @@ def groupings(self) -> "List[grouper.Grouping]":
]

def agg_series(
self, obj: Series, func, *args, engine="cython", engine_kwargs=None, **kwargs
self,
obj: Series,
func: F,
*args,
engine: str = "cython",
engine_kwargs=None,
**kwargs,
):
# Caller is responsible for checking ngroups != 0
assert self.ngroups != 0
Expand Down Expand Up @@ -950,7 +970,7 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series:


class FrameSplitter(DataSplitter):
def fast_apply(self, f, sdata: FrameOrSeries, names):
def fast_apply(self, f: F, sdata: FrameOrSeries, names):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we say anything about names?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly List[Label] but I'm not sure

# must return keys::list, values::list, mutated::bool
starts, ends = lib.generate_slices(self.slabels, self.ngroups)
return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
Expand Down