merge with upstream

Dr-Irv · Dr-Irv · commit b5ba2eda6f6c · 2024-11-01T09:27:27.000-04:00
diff --git a/pandas-stubs/_libs/tslibs/dtypes.pyi b/pandas-stubs/_libs/tslibs/dtypes.pyi
@@ -1,4 +1,5 @@
 from enum import Enum
+from typing import cast
 
 from .offsets import BaseOffset
 
@@ -29,16 +30,16 @@ class FreqGroup:
     def get_freq_group(code: int) -> int: ...
 
 class Resolution(Enum):
-    RESO_NS: int
-    RESO_US: int
-    RESO_MS: int
-    RESO_SEC: int
-    RESO_MIN: int
-    RESO_HR: int
-    RESO_DAY: int
-    RESO_MTH: int
-    RESO_QTR: int
-    RESO_YR: int
+    RESO_NS = cast(int, ...)
+    RESO_US = cast(int, ...)
+    RESO_MS = cast(int, ...)
+    RESO_SEC = cast(int, ...)
+    RESO_MIN = cast(int, ...)
+    RESO_HR = cast(int, ...)
+    RESO_DAY = cast(int, ...)
+    RESO_MTH = cast(int, ...)
+    RESO_QTR = cast(int, ...)
+    RESO_YR = cast(int, ...)
 
     def __lt__(self, other) -> bool: ...
     def __ge__(self, other) -> bool: ...
diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -1054,29 +1054,53 @@ class DataFrame(NDFrame, OpsMixin):
         errors: IgnoreRaise = ...,
     ) -> None: ...
     @overload
+    def groupby(  # pyright: ignore reportOverlappingOverload
+        self,
+        by: Scalar,
+        axis: AxisIndex | NoDefault = ...,
+        level: IndexLabel | None = ...,
+        as_index: Literal[True] = True,
+        sort: _bool = ...,
+        group_keys: _bool = ...,
+        observed: _bool | NoDefault = ...,
+        dropna: _bool = ...,
+    ) -> DataFrameGroupBy[Scalar, Literal[True]]: ...
+    @overload
     def groupby(
         self,
         by: Scalar,
         axis: AxisIndex | NoDefault = ...,
         level: IndexLabel | None = ...,
-        as_index: _bool = ...,
+        as_index: Literal[False] = ...,
         sort: _bool = ...,
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[Scalar]: ...
+    ) -> DataFrameGroupBy[Scalar, Literal[False]]: ...
+    @overload
+    def groupby(  # pyright: ignore reportOverlappingOverload
+        self,
+        by: DatetimeIndex,
+        axis: AxisIndex | NoDefault = ...,
+        level: IndexLabel | None = ...,
+        as_index: Literal[True] = True,
+        sort: _bool = ...,
+        group_keys: _bool = ...,
+        observed: _bool | NoDefault = ...,
+        dropna: _bool = ...,
+    ) -> DataFrameGroupBy[Timestamp, Literal[True]]: ...
     @overload
     def groupby(
         self,
         by: DatetimeIndex,
         axis: AxisIndex | NoDefault = ...,
         level: IndexLabel | None = ...,
-        as_index: _bool = ...,
+        as_index: Literal[False] = ...,
         sort: _bool = ...,
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[Timestamp]: ...
+    ) -> DataFrameGroupBy[Timestamp, Literal[False]]: ...
     @overload
     def groupby(
         self,
@@ -1088,7 +1112,7 @@ class DataFrame(NDFrame, OpsMixin):
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[Timedelta]: ...
+    ) -> DataFrameGroupBy[Timedelta, bool]: ...
     @overload
     def groupby(
         self,
@@ -1100,7 +1124,7 @@ class DataFrame(NDFrame, OpsMixin):
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[Period]: ...
+    ) -> DataFrameGroupBy[Period, bool]: ...
     @overload
     def groupby(
         self,
@@ -1112,7 +1136,7 @@ class DataFrame(NDFrame, OpsMixin):
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[IntervalT]: ...
+    ) -> DataFrameGroupBy[IntervalT, bool]: ...
     @overload
     def groupby(
         self,
@@ -1124,7 +1148,7 @@ class DataFrame(NDFrame, OpsMixin):
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[tuple]: ...
+    ) -> DataFrameGroupBy[tuple, bool]: ...
     @overload
     def groupby(
         self,
@@ -1136,7 +1160,7 @@ class DataFrame(NDFrame, OpsMixin):
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[SeriesByT]: ...
+    ) -> DataFrameGroupBy[SeriesByT, bool]: ...
     @overload
     def groupby(
         self,
@@ -1148,7 +1172,7 @@ class DataFrame(NDFrame, OpsMixin):
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[Any]: ...
+    ) -> DataFrameGroupBy[Any, bool]: ...
     def pivot(
         self,
         *,
diff --git a/pandas-stubs/core/groupby/generic.pyi b/pandas-stubs/core/groupby/generic.pyi
@@ -11,6 +11,7 @@ from typing import (
     Generic,
     Literal,
     NamedTuple,
+    TypeVar,
     final,
     overload,
 )
@@ -29,6 +30,7 @@ from typing_extensions import (
 )
 
 from pandas._libs.lib import NoDefault
+from pandas._libs.tslibs.timestamps import Timestamp
 from pandas._typing import (
     S1,
     AggFuncTypeBase,
@@ -182,7 +184,9 @@ class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]):
         self,
     ) -> Iterator[tuple[ByT, Series[S1]]]: ...
 
-class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT]):
+_TT = TypeVar("_TT", bound=Literal[True, False])
+
+class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT, _TT]):
     # error: Overload 3 for "apply" will never be used because its parameters overlap overload 1
     @overload  # type: ignore[override]
     def apply(
@@ -234,7 +238,7 @@ class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT]):
     @overload
     def __getitem__(  # pyright: ignore[reportIncompatibleMethodOverride]
         self, key: Iterable[Hashable]
-    ) -> DataFrameGroupBy[ByT]: ...
+    ) -> DataFrameGroupBy[ByT, bool]: ...
     def nunique(self, dropna: bool = ...) -> DataFrame: ...
     def idxmax(
         self,
@@ -386,3 +390,11 @@ class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT]):
     def __iter__(  # pyright: ignore[reportIncompatibleMethodOverride]
         self,
     ) -> Iterator[tuple[ByT, DataFrame]]: ...
+    @overload
+    def size(self: DataFrameGroupBy[ByT, Literal[True]]) -> Series[int]: ...
+    @overload
+    def size(self: DataFrameGroupBy[ByT, Literal[False]]) -> DataFrame: ...
+    @overload
+    def size(self: DataFrameGroupBy[Timestamp, Literal[True]]) -> Series[int]: ...
+    @overload
+    def size(self: DataFrameGroupBy[Timestamp, Literal[False]]) -> DataFrame: ...
diff --git a/pandas-stubs/core/groupby/groupby.pyi b/pandas-stubs/core/groupby/groupby.pyi
@@ -225,11 +225,7 @@ class GroupBy(BaseGroupBy[NDFrameT]):
     def sem(
         self: GroupBy[DataFrame], ddof: int = ..., numeric_only: bool = ...
     ) -> DataFrame: ...
-    @final
-    @overload
     def size(self: GroupBy[Series]) -> Series[int]: ...
-    @overload  # return type depends on `as_index` for dataframe groupby
-    def size(self: GroupBy[DataFrame]) -> DataFrame | Series[int]: ...
     @final
     def sum(
         self,
diff --git a/pandas-stubs/core/interchange/dataframe_protocol.pyi b/pandas-stubs/core/interchange/dataframe_protocol.pyi
@@ -11,33 +11,34 @@ import enum
 from typing import (
     Any,
     TypedDict,
+    cast,
 )
 
 class DlpackDeviceType(enum.IntEnum):
-    CPU: int
-    CUDA: int
-    CPU_PINNED: int
-    OPENCL: int
-    VULKAN: int
-    METAL: int
-    VPI: int
-    ROCM: int
+    CPU = cast(int, ...)
+    CUDA = cast(int, ...)
+    CPU_PINNED = cast(int, ...)
+    OPENCL = cast(int, ...)
+    VULKAN = cast(int, ...)
+    METAL = cast(int, ...)
+    VPI = cast(int, ...)
+    ROCM = cast(int, ...)
 
 class DtypeKind(enum.IntEnum):
-    INT: int
-    UINT: int
-    FLOAT: int
-    BOOL: int
-    STRING: int
-    DATETIME: int
-    CATEGORICAL: int
+    INT = cast(int, ...)
+    UINT = cast(int, ...)
+    FLOAT = cast(int, ...)
+    BOOL = cast(int, ...)
+    STRING = cast(int, ...)
+    DATETIME = cast(int, ...)
+    CATEGORICAL = cast(int, ...)
 
 class ColumnNullType(enum.IntEnum):
-    NON_NULLABLE: int
-    USE_NAN: int
-    USE_SENTINEL: int
-    USE_BITMASK: int
-    USE_BYTEMASK: int
+    NON_NULLABLE = cast(int, ...)
+    USE_NAN = cast(int, ...)
+    USE_SENTINEL = cast(int, ...)
+    USE_BITMASK = cast(int, ...)
+    USE_BYTEMASK = cast(int, ...)
 
 class ColumnBuffers(TypedDict):
     data: tuple[Buffer, Any]
diff --git a/scripts/__init__.py b/scripts/__init__.py
@@ -1,9 +1,10 @@
 import sys
+from typing import Any
 
 from loguru import logger
 
 # Config the format of log message
-config = {
+config: dict[str, Any] = {
     "handlers": [
         {
             "sink": sys.stderr,
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -1025,6 +1025,40 @@ def test_types_pivot_table() -> None:
     )
 
 
+def test_types_groupby_as_index() -> None:
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    check(
+        assert_type(
+            df.groupby("a", as_index=False).size(),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+    check(
+        assert_type(
+            df.groupby("a", as_index=True).size(),
+            "pd.Series[int]",
+        ),
+        pd.Series,
+    )
+
+
+def test_types_groupby_size() -> None:
+    """Test for GH886."""
+    data = [
+        {"date": "2023-12-01", "val": 12},
+        {"date": "2023-12-02", "val": 2},
+        {"date": "2023-12-03", "val": 1},
+        {"date": "2023-12-03", "val": 10},
+    ]
+
+    df = pd.DataFrame(data)
+    groupby = df.groupby("date")
+    size = groupby.size()
+    frame = size.to_frame()
+    check(assert_type(frame.reset_index(), pd.DataFrame), pd.DataFrame)
+
+
 def test_types_groupby() -> None:
     df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]})
     df.index.name = "ind"