From c4e37ab64374e00173b44823361bd3447559fb2c Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Wed, 5 Oct 2022 23:43:04 -0400 Subject: [PATCH 1/4] Allow covariance in the agg dict passed to DataFrame or Series groupby.agg() --- pandas-stubs/_typing.pyi | 4 ++-- tests/test_frame.py | 38 ++++++++++++++++++++++---------------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index f297b2985..5488510d8 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -127,8 +127,8 @@ F = TypeVar("F", bound=FuncType) HashableT = TypeVar("HashableT", bound=Hashable) AggFuncTypeBase: TypeAlias = Union[Callable, str, np.ufunc] -AggFuncTypeDictSeries: TypeAlias = dict[Hashable, AggFuncTypeBase] -AggFuncTypeDictFrame: TypeAlias = dict[ +AggFuncTypeDictSeries: TypeAlias = Mapping[Hashable, AggFuncTypeBase] +AggFuncTypeDictFrame: TypeAlias = Mapping[ Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]] ] AggFuncTypeSeriesToFrame: TypeAlias = Union[ diff --git a/tests/test_frame.py b/tests/test_frame.py index 63ef1148a..2b3d2e3d3 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -33,7 +33,10 @@ from typing_extensions import assert_type import xarray as xr -from pandas._typing import Scalar +from pandas._typing import ( + AggFuncTypeBase, + Scalar, +) from tests import ( TYPE_CHECKING_INVALID_USAGE, @@ -643,7 +646,9 @@ def test_types_groupby_methods() -> None: def test_types_groupby_agg() -> None: - df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]}) + df = pd.DataFrame( + data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0], 0: [-1, -1, -1]} + ) check(assert_type(df.groupby("col1")["col3"].agg(min), pd.Series), pd.Series) check( assert_type(df.groupby("col1")["col3"].agg([min, max]), pd.DataFrame), @@ -655,21 +660,19 @@ def test_types_groupby_agg() -> None: assert_type(df.groupby("col1").agg(["min", "max"]), pd.DataFrame), pd.DataFrame ) check(assert_type(df.groupby("col1").agg([min, max]), pd.DataFrame), pd.DataFrame) + agg_dict1: dict[Hashable, str] = {"col2": "min", "col3": "max", 0: "sum"} + check(assert_type(df.groupby("col1").agg(agg_dict1), pd.DataFrame), pd.DataFrame) + agg_dict2: dict[Hashable, AggFuncTypeBase] = {"col2": min, "col3": max, 0: min} + check(assert_type(df.groupby("col1").agg(agg_dict2), pd.DataFrame), pd.DataFrame) + agg_dict3: dict[Hashable, str | AggFuncTypeBase] = { + "col2": min, + "col3": "max", + 0: lambda x: x.min(), + } + check(assert_type(df.groupby("col1").agg(agg_dict3), pd.DataFrame), pd.DataFrame) + named_agg = pd.NamedAgg(column="col2", aggfunc="max") check( - assert_type( - df.groupby("col1").agg({"col2": "min", "col3": "max"}), pd.DataFrame - ), - pd.DataFrame, - ) - check( - assert_type(df.groupby("col1").agg({"col2": min, "col3": max}), pd.DataFrame), - pd.DataFrame, - ) - check( - assert_type( - df.groupby("col1").agg(new_col=pd.NamedAgg(column="col2", aggfunc="max")), - pd.DataFrame, - ), + assert_type(df.groupby("col1").agg(new_col=named_agg), pd.DataFrame), pd.DataFrame, ) # GH#187 @@ -679,6 +682,9 @@ def test_types_groupby_agg() -> None: cols_opt: list[str | None] = ["col1", "col2"] check(assert_type(df.groupby(by=cols_opt).sum(), pd.DataFrame), pd.DataFrame) + cols_mixed: list[str | int] = ["col1", 0] + check(assert_type(df.groupby(by=cols_mixed).sum(), pd.DataFrame), pd.DataFrame) + # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html def test_types_group_by_with_dropna_keyword() -> None: From 19655c23e27e74aba2a6a2ac234a0fdc9b168e89 Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Thu, 6 Oct 2022 10:35:12 -0400 Subject: [PATCH 2/4] Add test cases for agg dicts with keys which are sub-types of Hashable and adjust type annotation to support this --- pandas-stubs/_typing.pyi | 4 ++-- tests/test_frame.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 5488510d8..68875f34d 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -127,9 +127,9 @@ F = TypeVar("F", bound=FuncType) HashableT = TypeVar("HashableT", bound=Hashable) AggFuncTypeBase: TypeAlias = Union[Callable, str, np.ufunc] -AggFuncTypeDictSeries: TypeAlias = Mapping[Hashable, AggFuncTypeBase] +AggFuncTypeDictSeries: TypeAlias = Mapping[HashableT, AggFuncTypeBase] AggFuncTypeDictFrame: TypeAlias = Mapping[ - Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]] + HashableT, Union[AggFuncTypeBase, list[AggFuncTypeBase]] ] AggFuncTypeSeriesToFrame: TypeAlias = Union[ list[AggFuncTypeBase], diff --git a/tests/test_frame.py b/tests/test_frame.py index 2b3d2e3d3..e7f895143 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -670,6 +670,10 @@ def test_types_groupby_agg() -> None: 0: lambda x: x.min(), } check(assert_type(df.groupby("col1").agg(agg_dict3), pd.DataFrame), pd.DataFrame) + agg_dict4 = {"col2": "sum"} + check(assert_type(df.groupby("col1").agg(agg_dict4), pd.DataFrame), pd.DataFrame) + agg_dict5 = {0: "sum"} + check(assert_type(df.groupby("col1").agg(agg_dict5), pd.DataFrame), pd.DataFrame) named_agg = pd.NamedAgg(column="col2", aggfunc="max") check( assert_type(df.groupby("col1").agg(new_col=named_agg), pd.DataFrame), From 13f3144a8b480673d4d8a2ba42210874e17fed09 Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Fri, 7 Oct 2022 19:22:30 -0400 Subject: [PATCH 3/4] Remove/adjust agg dict annotations on test --- tests/test_frame.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index e7f895143..8c9ff44c5 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -33,10 +33,7 @@ from typing_extensions import assert_type import xarray as xr -from pandas._typing import ( - AggFuncTypeBase, - Scalar, -) +from pandas._typing import Scalar from tests import ( TYPE_CHECKING_INVALID_USAGE, @@ -660,11 +657,12 @@ def test_types_groupby_agg() -> None: assert_type(df.groupby("col1").agg(["min", "max"]), pd.DataFrame), pd.DataFrame ) check(assert_type(df.groupby("col1").agg([min, max]), pd.DataFrame), pd.DataFrame) - agg_dict1: dict[Hashable, str] = {"col2": "min", "col3": "max", 0: "sum"} + agg_dict1 = {"col2": "min", "col3": "max", 0: "sum"} check(assert_type(df.groupby("col1").agg(agg_dict1), pd.DataFrame), pd.DataFrame) - agg_dict2: dict[Hashable, AggFuncTypeBase] = {"col2": min, "col3": max, 0: min} + agg_dict2 = {"col2": min, "col3": max, 0: min} check(assert_type(df.groupby("col1").agg(agg_dict2), pd.DataFrame), pd.DataFrame) - agg_dict3: dict[Hashable, str | AggFuncTypeBase] = { + # Here, MyPy infers dict[object, object], so it must be explicitly annotated + agg_dict3: dict[str | int, str | Callable] = { "col2": min, "col3": "max", 0: lambda x: x.min(), From 32908f3687bac42cacee414488a9b659b114c6a3 Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Tue, 11 Oct 2022 21:04:42 -0400 Subject: [PATCH 4/4] Change lambda to a local function --- tests/test_frame.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 8c9ff44c5..3f0a4ad52 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -661,11 +661,15 @@ def test_types_groupby_agg() -> None: check(assert_type(df.groupby("col1").agg(agg_dict1), pd.DataFrame), pd.DataFrame) agg_dict2 = {"col2": min, "col3": max, 0: min} check(assert_type(df.groupby("col1").agg(agg_dict2), pd.DataFrame), pd.DataFrame) + + def wrapped_min(x: Any) -> Any: + return x.min() + # Here, MyPy infers dict[object, object], so it must be explicitly annotated agg_dict3: dict[str | int, str | Callable] = { "col2": min, "col3": "max", - 0: lambda x: x.min(), + 0: wrapped_min, } check(assert_type(df.groupby("col1").agg(agg_dict3), pd.DataFrame), pd.DataFrame) agg_dict4 = {"col2": "sum"}