diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi index 0f72b2b72141f..09f4fbec5176e 100644 --- a/pandas/_libs/groupby.pyi +++ b/pandas/_libs/groupby.pyi @@ -55,6 +55,7 @@ def group_any_all( mask: np.ndarray, # const uint8_t[::1] val_test: Literal["any", "all"], skipna: bool, + nullable: bool, ) -> None: ... def group_sum( out: np.ndarray, # complexfloatingintuint_t[:, ::1] diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 998a6f69a930a..da8c367d4fc2a 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -2042,7 +2042,7 @@ def _compute_na_values(): np.uint16: uint16info.max, np.uint8: uint8info.max, np.bool_: uint8info.max, - np.object_: np.nan # oof + np.object_: np.nan, } return na_values diff --git a/pandas/_testing/compat.py b/pandas/_testing/compat.py index e2ac8f779ef0e..bb3bb99a4c6e4 100644 --- a/pandas/_testing/compat.py +++ b/pandas/_testing/compat.py @@ -1,11 +1,12 @@ """ Helpers for sharing tests between DataFrame/Series """ +from pandas._typing import DtypeObj from pandas import DataFrame -def get_dtype(obj): +def get_dtype(obj) -> DtypeObj: if isinstance(obj, DataFrame): # Note: we are assuming only one column return obj.dtypes.iat[0] diff --git a/pandas/conftest.py b/pandas/conftest.py index 888205366f9e6..4d0354a2aab04 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -17,6 +17,7 @@ - Dtypes - Misc """ +from __future__ import annotations from collections import abc from datetime import ( @@ -31,6 +32,7 @@ import os from typing import ( Callable, + Hashable, Iterator, ) @@ -1881,7 +1883,7 @@ def __init__(self, **kwargs) -> None: (pd.NA, pd.NA, pd.NA), ] ) -def names(request): +def names(request) -> tuple[Hashable, Hashable, Hashable]: """ A 3-tuple of names, the first two for operands, the last for a result. """ @@ -1937,7 +1939,7 @@ def indexer_ial(request): @pytest.fixture -def using_array_manager(): +def using_array_manager() -> bool: """ Fixture to check if the array manager is being used. """ @@ -1958,7 +1960,7 @@ def using_copy_on_write() -> bool: @pytest.fixture(params=warsaws) -def warsaw(request): +def warsaw(request) -> str: """ tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo. """ diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index bed2ed113606e..907804ab9e51d 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -4,7 +4,11 @@ import numpy as np -from pandas._typing import DtypeObj +from pandas._typing import ( + TYPE_CHECKING, + DtypeObj, + type_t, +) from pandas.compat import pa_version_under7p0 from pandas.util._decorators import cache_readonly @@ -16,6 +20,9 @@ if not pa_version_under7p0: import pyarrow as pa +if TYPE_CHECKING: + from pandas.core.arrays.arrow import ArrowExtensionArray + @register_extension_dtype class ArrowDtype(StorageExtensionDtype): @@ -113,7 +120,7 @@ def itemsize(self) -> int: return self.numpy_dtype.itemsize @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> type_t[ArrowExtensionArray]: """ Return the array type associated with this dtype. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cdb5dddf03a64..fd07b472fc3da 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -66,10 +66,7 @@ notna, ) -from pandas.core import ( - algorithms, - nanops, -) +from pandas.core import algorithms from pandas.core.apply import ( GroupByApply, maybe_mangle_lambdas, @@ -98,6 +95,7 @@ from pandas.plotting import boxplot_frame_groupby if TYPE_CHECKING: + from pandas import Categorical from pandas.core.generic import NDFrame # TODO(typing) the return value on this callable should be any *scalar*. @@ -138,29 +136,6 @@ class NamedAgg(NamedTuple): aggfunc: AggScalar -def generate_property(name: str, klass: type[DataFrame | Series]): - """ - Create a property for a GroupBy subclass to dispatch to DataFrame/Series. - - Parameters - ---------- - name : str - klass : {DataFrame, Series} - - Returns - ------- - property - """ - - def prop(self): - return self._make_wrapper(name) - - parent_method = getattr(klass, name) - prop.__doc__ = parent_method.__doc__ or "" - prop.__name__ = name - return property(prop) - - class SeriesGroupBy(GroupBy[Series]): def _wrap_agged_manager(self, mgr: Manager) -> Series: return self.obj._constructor(mgr, name=self.obj.name) @@ -718,18 +693,13 @@ def value_counts( else: # lab is a Categorical with categories an IntervalIndex - lab = cut(Series(val), bins, include_lowest=True) - # error: "ndarray" has no attribute "cat" - lev = lab.cat.categories # type: ignore[attr-defined] - # error: No overload variant of "take" of "_ArrayOrScalarCommon" matches - # argument types "Any", "bool", "Union[Any, float]" - lab = lev.take( # type: ignore[call-overload] - # error: "ndarray" has no attribute "cat" - lab.cat.codes, # type: ignore[attr-defined] + cat_ser = cut(Series(val), bins, include_lowest=True) + cat_obj = cast("Categorical", cat_ser._values) + lev = cat_obj.categories + lab = lev.take( + cat_obj.codes, allow_fill=True, - # error: Item "ndarray" of "Union[ndarray, Index]" has no attribute - # "_na_value" - fill_value=lev._na_value, # type: ignore[union-attr] + fill_value=lev._na_value, ) llab = lambda lab, inc: lab[inc]._multiindex.codes[-1] @@ -1544,7 +1514,6 @@ def _cython_transform( **kwargs, ) -> DataFrame: assert axis == 0 # handled by caller - # TODO: no tests with self.ndim == 1 for DataFrameGroupBy # With self.axis == 0, we have multi-block tests # e.g. test_rank_min_int, test_cython_transform_frame @@ -2058,17 +2027,7 @@ def idxmax( axis = self.axis def func(df): - res = df._reduce( - nanops.nanargmax, - "argmax", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - ) - indices = res._values - index = df._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] - return df._constructor_sliced(result, index=res.index) + return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only) func.__name__ = "idxmax" result = self._python_apply_general( @@ -2154,17 +2113,7 @@ def idxmin( axis = self.axis def func(df): - res = df._reduce( - nanops.nanargmin, - "argmin", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - ) - indices = res._values - index = df._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] - return df._constructor_sliced(result, index=res.index) + return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only) func.__name__ = "idxmin" result = self._python_apply_general( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5f5bb1c8833da..a54c524094b23 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1380,9 +1380,7 @@ def _python_apply_general( this can be coincidental leading to value-dependent behavior. is_transform : bool, default False Indicator for whether the function is actually a transform - and should not have group keys prepended. This is used - in _make_wrapper which generates both transforms (e.g. diff) - and non-transforms (e.g. corr) + and should not have group keys prepended. is_agg : bool, default False Indicator for whether the function is an aggregation. When the result is empty, we don't want to warn for this case. @@ -4110,15 +4108,8 @@ def get_groupby( obj: NDFrame, by: _KeysArgType | None = None, axis: AxisInt = 0, - level=None, grouper: ops.BaseGrouper | None = None, - exclusions=None, - selection=None, - as_index: bool = True, - sort: bool = True, group_keys: bool | lib.NoDefault = True, - observed: bool = False, - dropna: bool = True, ) -> GroupBy: klass: type[GroupBy] @@ -4137,15 +4128,8 @@ def get_groupby( obj=obj, keys=by, axis=axis, - level=level, grouper=grouper, - exclusions=exclusions, - selection=selection, - as_index=as_index, - sort=sort, group_keys=group_keys, - observed=observed, - dropna=dropna, ) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index dc109f6b30d5c..62e12bd4a66e8 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -743,7 +743,7 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: Generator yielding subsetted objects """ ids, _, ngroups = self.group_info - return get_splitter(data, ids, ngroups, axis=axis) + return _get_splitter(data, ids, ngroups, axis=axis) @final @cache_readonly @@ -1017,13 +1017,12 @@ def agg_series( def _aggregate_series_pure_python( self, obj: Series, func: Callable ) -> npt.NDArray[np.object_]: - ids, _, ngroups = self.group_info + _, _, ngroups = self.group_info result = np.empty(ngroups, dtype="O") initialized = False - # equiv: splitter = self._get_splitter(obj, axis=0) - splitter = get_splitter(obj, ids, ngroups, axis=0) + splitter = self._get_splitter(obj, axis=0) for i, group in enumerate(splitter): res = func(group) @@ -1268,7 +1267,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: return df.__finalize__(sdata, method="groupby") -def get_splitter( +def _get_splitter( data: NDFrame, labels: np.ndarray, ngroups: int, axis: AxisInt = 0 ) -> DataSplitter: if isinstance(data, Series): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index fb980db320bfd..22fe227d21727 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -261,10 +261,6 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block: new_mgr_locs = self._mgr_locs[slicer] new_values = self._slice(slicer) - - if new_values.ndim != self.values.ndim: - raise ValueError("Only same dim slicing is allowed") - return type(self)(new_values, new_mgr_locs, self.ndim) @final diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 31d1be6c9ccbe..613f841a9a340 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -161,9 +161,7 @@ def __init__( self._timegrouper = timegrouper self.keys = None self.sort = True - # error: Incompatible types in assignment (expression has type "Union - # [int, Literal['index', 'columns', 'rows']]", variable has type "int") - self.axis = axis # type: ignore[assignment] + self.axis = obj._get_axis_number(axis) self.kind = kind self.group_keys = group_keys self.as_index = True diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 1313c39bb67b2..9a64c5eea33a8 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -708,6 +708,7 @@ def test_cython_transform_frame(op, args, targop): {"by": "string"}, ]: # {"by": 'string_missing'}]: # {"by": ['int','string']}]: + # TODO: remove or enable commented-out code gb = df.groupby(group_keys=False, **gb_target) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 3cc0d59457528..f1e40691059e2 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -560,8 +560,6 @@ def _check_plot_works(f, default_axes=False, **kwargs): with tm.ensure_clean(return_filelike=True) as path: plt.savefig(path) - except Exception as err: - raise err finally: tm.close(fig)