From 2fd14c25f56c821d70c2321b8c1e28d1132fa69a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 22 Mar 2020 20:26:51 -0500 Subject: [PATCH 01/13] TYP: Annotate groupby/ops.py --- pandas/core/groupby/ops.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 577c874c9cbbe..fc6bf5dd1e342 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -7,7 +7,7 @@ """ import collections -from typing import List, Optional, Sequence, Tuple, Type +from typing import Callable, List, Optional, Sequence, Tuple, Type import numpy as np @@ -102,7 +102,7 @@ def groupings(self) -> List["grouper.Grouping"]: return self._groupings @property - def shape(self): + def shape(self) -> Tuple: return tuple(ping.ngroups for ping in self.groupings) def __iter__(self): @@ -148,7 +148,7 @@ def _get_group_keys(self): # provide "flattened" iterator for multi-group setting return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) - def apply(self, f, data: FrameOrSeries, axis: int = 0): + def apply(self, f: Callable, data: FrameOrSeries, axis: int = 0): mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() @@ -229,7 +229,7 @@ def levels(self) -> List[Index]: return [ping.group_index for ping in self.groupings] @property - def names(self): + def names(self) -> List[str]: return [ping.name for ping in self.groupings] def size(self) -> Series: @@ -307,7 +307,7 @@ def result_index(self) -> Index: ) return result - def get_group_levels(self): + def get_group_levels(self) -> List: if not self.compressed and len(self.groupings) == 1: return [self.groupings[0].result_index] @@ -356,7 +356,7 @@ def _is_builtin_func(self, arg): """ return SelectionMixin._builtin_table.get(arg, arg) - def _get_cython_function(self, kind: str, how: str, values, is_numeric: bool): + def _get_cython_function(self, kind: str, how: str, values: np.ndarray, is_numeric: bool): dtype_str = values.dtype.name ftype = self._cython_functions[kind][how] @@ -425,7 +425,7 @@ def _get_cython_func_and_vals( return func, values def _cython_operation( - self, kind: str, values, how: str, axis, min_count: int = -1, **kwargs + self, kind: str, values: np.ndarray, how: str, axis: int, min_count: int = -1, **kwargs ) -> Tuple[np.ndarray, Optional[List[str]]]: """ Returns the values of a cython operation as a Tuple of [data, names]. @@ -580,13 +580,13 @@ def _cython_operation( return result, names def aggregate( - self, values, how: str, axis: int = 0, min_count: int = -1 + self, values: np.ndarray, how: str, axis: int = 0, min_count: int = -1 ) -> Tuple[np.ndarray, Optional[List[str]]]: return self._cython_operation( "aggregate", values, how, axis, min_count=min_count ) - def transform(self, values, how: str, axis: int = 0, **kwargs): + def transform(self, values: np.ndarray, how: str, axis: int = 0, **kwargs): return self._cython_operation("transform", values, how, axis, **kwargs) def _aggregate( @@ -617,7 +617,7 @@ def _transform( return result - def agg_series(self, obj: Series, func): + def agg_series(self, obj: Series, func: Callable): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 @@ -646,7 +646,7 @@ def agg_series(self, obj: Series, func): raise return self._aggregate_series_pure_python(obj, func) - def _aggregate_series_fast(self, obj: Series, func): + def _aggregate_series_fast(self, obj: Series, func: Callable): # At this point we have already checked that # - obj.index is not a MultiIndex # - obj is backed by an ndarray, not ExtensionArray @@ -665,7 +665,7 @@ def _aggregate_series_fast(self, obj: Series, func): result, counts = grouper.get_result() return result, counts - def _aggregate_series_pure_python(self, obj: Series, func): + def _aggregate_series_pure_python(self, obj: Series, func: Callable): group_index, _, ngroups = self.group_info @@ -837,11 +837,11 @@ def result_index(self): return self.binlabels @property - def levels(self): + def levels(self) -> List: return [self.binlabels] @property - def names(self): + def names(self) -> List: return [self.binlabels.name] @property @@ -851,7 +851,7 @@ def groupings(self) -> "List[grouper.Grouping]": for lvl, name in zip(self.levels, self.names) ] - def agg_series(self, obj: Series, func): + def agg_series(self, obj: Series, func: Callable): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 assert len(self.bins) > 0 # otherwise we'd get IndexError in get_result @@ -925,7 +925,7 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): - def fast_apply(self, f, sdata: FrameOrSeries, names): + def fast_apply(self, f: Callable, sdata: FrameOrSeries, names): # must return keys::list, values::list, mutated::bool starts, ends = lib.generate_slices(self.slabels, self.ngroups) return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) From 789039bcd921dde603262bd5824a677eaaa707e7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 22 Mar 2020 20:58:16 -0500 Subject: [PATCH 02/13] Blacken --- pandas/core/groupby/ops.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index fc6bf5dd1e342..6dc6490176586 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -356,7 +356,9 @@ def _is_builtin_func(self, arg): """ return SelectionMixin._builtin_table.get(arg, arg) - def _get_cython_function(self, kind: str, how: str, values: np.ndarray, is_numeric: bool): + def _get_cython_function( + self, kind: str, how: str, values: np.ndarray, is_numeric: bool + ): dtype_str = values.dtype.name ftype = self._cython_functions[kind][how] @@ -425,7 +427,13 @@ def _get_cython_func_and_vals( return func, values def _cython_operation( - self, kind: str, values: np.ndarray, how: str, axis: int, min_count: int = -1, **kwargs + self, + kind: str, + values: np.ndarray, + how: str, + axis: int, + min_count: int = -1, + **kwargs, ) -> Tuple[np.ndarray, Optional[List[str]]]: """ Returns the values of a cython operation as a Tuple of [data, names]. From 0e0cff3c92f992a2271f70703689ff8f114a0c22 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sun, 22 Mar 2020 21:36:34 -0500 Subject: [PATCH 03/13] Update pandas/core/groupby/ops.py Co-Authored-By: William Ayd --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6dc6490176586..f179e9eaacc88 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -102,7 +102,7 @@ def groupings(self) -> List["grouper.Grouping"]: return self._groupings @property - def shape(self) -> Tuple: + def shape(self) -> Tuple[int, int]: return tuple(ping.ngroups for ping in self.groupings) def __iter__(self): From 1282d17f7a3cf98c188f66f0a83bbeb0fa9d9396 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 22 Mar 2020 22:56:36 -0500 Subject: [PATCH 04/13] Use ellipsis --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f179e9eaacc88..ced8087db9770 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -102,7 +102,7 @@ def groupings(self) -> List["grouper.Grouping"]: return self._groupings @property - def shape(self) -> Tuple[int, int]: + def shape(self) -> Tuple[int, ...]: return tuple(ping.ngroups for ping in self.groupings) def __iter__(self): From bc8769877c28050f45d21afdfc061cc644270533 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 22 Mar 2020 23:05:37 -0500 Subject: [PATCH 05/13] List -> List[Index] --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ced8087db9770..795bc5a7e2282 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -307,7 +307,7 @@ def result_index(self) -> Index: ) return result - def get_group_levels(self) -> List: + def get_group_levels(self) -> List[Index]: if not self.compressed and len(self.groupings) == 1: return [self.groupings[0].result_index] From 06a6c2de6df689292b6378082e6c35107948caa6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 23 Mar 2020 13:38:20 -0500 Subject: [PATCH 06/13] Specify Callable types --- pandas/core/groupby/ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 795bc5a7e2282..5c3e399d6177e 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -7,7 +7,7 @@ """ import collections -from typing import Callable, List, Optional, Sequence, Tuple, Type +from typing import Any, Callable, List, Optional, Sequence, Tuple, Type import numpy as np @@ -148,7 +148,9 @@ def _get_group_keys(self): # provide "flattened" iterator for multi-group setting return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) - def apply(self, f: Callable, data: FrameOrSeries, axis: int = 0): + def apply( + self, f: Callable[[FrameOrSeries], Any], data: FrameOrSeries, axis: int = 0 + ): mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() From becfb17a80020c13147a18317aa3a3b08a26d87e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 23 Mar 2020 17:59:09 -0500 Subject: [PATCH 07/13] More Callable subscripts --- pandas/core/groupby/ops.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5c3e399d6177e..8b33484e00caf 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -627,7 +627,7 @@ def _transform( return result - def agg_series(self, obj: Series, func: Callable): + def agg_series(self, obj: Series, func: Callable[[Series], Any]): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 @@ -656,7 +656,7 @@ def agg_series(self, obj: Series, func: Callable): raise return self._aggregate_series_pure_python(obj, func) - def _aggregate_series_fast(self, obj: Series, func: Callable): + def _aggregate_series_fast(self, obj: Series, func: Callable[[Series], Any]): # At this point we have already checked that # - obj.index is not a MultiIndex # - obj is backed by an ndarray, not ExtensionArray @@ -675,7 +675,7 @@ def _aggregate_series_fast(self, obj: Series, func: Callable): result, counts = grouper.get_result() return result, counts - def _aggregate_series_pure_python(self, obj: Series, func: Callable): + def _aggregate_series_pure_python(self, obj: Series, func: Callable[[Series], Any]): group_index, _, ngroups = self.group_info @@ -861,7 +861,7 @@ def groupings(self) -> "List[grouper.Grouping]": for lvl, name in zip(self.levels, self.names) ] - def agg_series(self, obj: Series, func: Callable): + def agg_series(self, obj: Series, func: Callable[[Series], Any]): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 assert len(self.bins) > 0 # otherwise we'd get IndexError in get_result @@ -935,7 +935,9 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): - def fast_apply(self, f: Callable, sdata: FrameOrSeries, names): + def fast_apply( + self, f: Callable[[FrameOrSeries], Any], sdata: FrameOrSeries, names + ): # must return keys::list, values::list, mutated::bool starts, ends = lib.generate_slices(self.slabels, self.ngroups) return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) From ce69bfbc36066626faaf2f00df049a8d5318dcc3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 1 Apr 2020 20:48:18 -0500 Subject: [PATCH 08/13] Update --- pandas/core/groupby/ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 21fee01d1a96a..f1ce5d14073c3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -14,7 +14,7 @@ from pandas._libs import NaT, iNaT, lib import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import FrameOrSeries +from pandas._typing import ArrayLike, FrameOrSeries, Label from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -231,7 +231,7 @@ def levels(self) -> List[Index]: return [ping.group_index for ping in self.groupings] @property - def names(self) -> List[str]: + def names(self) -> List[Label]: return [ping.name for ping in self.groupings] def size(self) -> Series: @@ -431,7 +431,7 @@ def _get_cython_func_and_vals( def _cython_operation( self, kind: str, - values: np.ndarray, + values: ArrayLike, how: str, axis: int, min_count: int = -1, From 1b11b283774221f0f3697b66a444105554a48eb4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 1 Apr 2020 21:25:41 -0500 Subject: [PATCH 09/13] No ArrayLike --- pandas/core/groupby/ops.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f1ce5d14073c3..5c29b9de9469a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -429,13 +429,7 @@ def _get_cython_func_and_vals( return func, values def _cython_operation( - self, - kind: str, - values: ArrayLike, - how: str, - axis: int, - min_count: int = -1, - **kwargs, + self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs, ) -> Tuple[np.ndarray, Optional[List[str]]]: """ Returns the values of a cython operation as a Tuple of [data, names]. From e16ecc2bc9b1998c94f2b64c65d0b7a168193736 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 1 Apr 2020 21:55:39 -0500 Subject: [PATCH 10/13] Import --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5c29b9de9469a..6ae216e6cee47 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -14,7 +14,7 @@ from pandas._libs import NaT, iNaT, lib import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import ArrayLike, FrameOrSeries, Label +from pandas._typing import FrameOrSeries, Label from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly From 301b6cc66734eb95edef73db20b58099255f564a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 3 Apr 2020 20:42:57 -0500 Subject: [PATCH 11/13] Update --- pandas/core/groupby/grouper.py | 2 +- pandas/core/groupby/ops.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 2f50845fda4dc..3b5124987f41d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -223,7 +223,7 @@ class Grouping: index : Index grouper : obj Union[DataFrame, Series]: - name : + name : Label level : observed : bool, default False If we are a Categorical, use the observed values diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6ae216e6cee47..3a8aa936e1bea 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -429,7 +429,7 @@ def _get_cython_func_and_vals( return func, values def _cython_operation( - self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs, + self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs ) -> Tuple[np.ndarray, Optional[List[str]]]: """ Returns the values of a cython operation as a Tuple of [data, names]. @@ -582,13 +582,13 @@ def _cython_operation( return result, names def aggregate( - self, values: np.ndarray, how: str, axis: int = 0, min_count: int = -1 + self, values, how: str, axis: int = 0, min_count: int = -1 ) -> Tuple[np.ndarray, Optional[List[str]]]: return self._cython_operation( "aggregate", values, how, axis, min_count=min_count ) - def transform(self, values: np.ndarray, how: str, axis: int = 0, **kwargs): + def transform(self, values, how: str, axis: int = 0, **kwargs): return self._cython_operation("transform", values, how, axis, **kwargs) def _aggregate( @@ -832,11 +832,11 @@ def result_index(self): return self.binlabels @property - def levels(self) -> List: + def levels(self) -> List[Index]: return [self.binlabels] @property - def names(self) -> List: + def names(self) -> List[Label]: return [self.binlabels.name] @property From bcd964daff74f768a6e7b058af9c40e81ff84e20 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 10 Apr 2020 12:34:07 -0500 Subject: [PATCH 12/13] Use F --- pandas/core/groupby/ops.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 8df14047106b2..f5408da19d6c0 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -7,14 +7,14 @@ """ import collections -from typing import Any, Callable, List, Optional, Sequence, Tuple, Type +from typing import List, Optional, Sequence, Tuple, Type import numpy as np from pandas._libs import NaT, iNaT, lib import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import FrameOrSeries, Label +from pandas._typing import F, FrameOrSeries, Label from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -148,9 +148,7 @@ def _get_group_keys(self): # provide "flattened" iterator for multi-group setting return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) - def apply( - self, f: Callable[[FrameOrSeries], Any], data: FrameOrSeries, axis: int = 0 - ): + def apply(self, f: F, data: FrameOrSeries, axis: int = 0): mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() @@ -612,7 +610,7 @@ def _transform( return result - def agg_series(self, obj: Series, func: Callable[[Series], Any]): + def agg_series(self, obj: Series, func: F): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 @@ -641,7 +639,7 @@ def agg_series(self, obj: Series, func: Callable[[Series], Any]): raise return self._aggregate_series_pure_python(obj, func) - def _aggregate_series_fast(self, obj: Series, func: Callable[[Series], Any]): + def _aggregate_series_fast(self, obj: Series, func: F): # At this point we have already checked that # - obj.index is not a MultiIndex # - obj is backed by an ndarray, not ExtensionArray @@ -660,7 +658,7 @@ def _aggregate_series_fast(self, obj: Series, func: Callable[[Series], Any]): result, counts = grouper.get_result() return result, counts - def _aggregate_series_pure_python(self, obj: Series, func: Callable[[Series], Any]): + def _aggregate_series_pure_python(self, obj: Series, func: F): group_index, _, ngroups = self.group_info @@ -846,7 +844,7 @@ def groupings(self) -> "List[grouper.Grouping]": for lvl, name in zip(self.levels, self.names) ] - def agg_series(self, obj: Series, func: Callable[[Series], Any]): + def agg_series(self, obj: Series, func: F): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 assert len(self.bins) > 0 # otherwise we'd get IndexError in get_result @@ -920,9 +918,7 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): - def fast_apply( - self, f: Callable[[FrameOrSeries], Any], sdata: FrameOrSeries, names - ): + def fast_apply(self, f: F, sdata: FrameOrSeries, names): # must return keys::list, values::list, mutated::bool starts, ends = lib.generate_slices(self.slabels, self.ngroups) return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) From 60b924d8f135615dbfce1d5dd20a746dd4920a78 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 10 Apr 2020 13:15:16 -0500 Subject: [PATCH 13/13] Lint --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 75c935cdf2e60..80573f32b936e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -8,7 +8,7 @@ from pandas._libs import NaT, algos as libalgos, lib, writers import pandas._libs.internals as libinternals -from pandas._libs.tslibs import Timedelta, conversion +from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import ArrayLike from pandas.util._validators import validate_bool_kwarg