diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7c850ffedfcab..690bd9bc9704b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -545,6 +545,7 @@ ExtensionArray - Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`) - Fixed bug when applying a NumPy ufunc with multiple outputs to a :class:`pandas.arrays.IntegerArray` returning None (:issue:`36913`) - Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`) +- Reductions for :class:`BooleanArray`, :class:`Categorical`, :class:`DatetimeArray`, :class:`FloatingArray`, :class:`IntegerArray`, :class:`PeriodArray`, :class:`TimedeltaArray`, and :class:`PandasArray` are now keyword-only methods (:issue:`37541`) Other ^^^^^ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index b46e70f5a936d..028a0c4684aef 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -424,7 +424,7 @@ def _values_for_argsort(self) -> np.ndarray: data[self._mask] = -1 return data - def any(self, skipna: bool = True, **kwargs): + def any(self, *, skipna: bool = True, **kwargs): """ Return whether any element is True. @@ -492,7 +492,7 @@ def any(self, skipna: bool = True, **kwargs): else: return self.dtype.na_value - def all(self, skipna: bool = True, **kwargs): + def all(self, *, skipna: bool = True, **kwargs): """ Return whether all elements are True. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 499bb364c48a1..53e5d95907bde 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1945,7 +1945,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: # Reductions @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") - def min(self, skipna=True, **kwargs): + def min(self, *, skipna=True, **kwargs): """ The minimum value of the object. @@ -1981,7 +1981,7 @@ def min(self, skipna=True, **kwargs): return self.categories[pointer] @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") - def max(self, skipna=True, **kwargs): + def max(self, *, skipna=True, **kwargs): """ The maximum value of the object. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1184464e967af..f8a609fb0cabe 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1245,7 +1245,7 @@ def __isub__(self, other): # -------------------------------------------------------------- # Reductions - def min(self, axis=None, skipna=True, *args, **kwargs): + def min(self, *, axis=None, skipna=True, **kwargs): """ Return the minimum value of the Array or minimum along an axis. @@ -1256,7 +1256,7 @@ def min(self, axis=None, skipna=True, *args, **kwargs): Index.min : Return the minimum value in an Index. Series.min : Return the minimum value in a Series. """ - nv.validate_min(args, kwargs) + nv.validate_min((), kwargs) nv.validate_minmax_axis(axis, self.ndim) if is_period_dtype(self.dtype): @@ -1276,7 +1276,7 @@ def min(self, axis=None, skipna=True, *args, **kwargs): return self._box_func(result) return self._from_backing_data(result) - def max(self, axis=None, skipna=True, *args, **kwargs): + def max(self, *, axis=None, skipna=True, **kwargs): """ Return the maximum value of the Array or maximum along an axis. @@ -1289,7 +1289,7 @@ def max(self, axis=None, skipna=True, *args, **kwargs): """ # TODO: skipna is broken with max. # See https://github.com/pandas-dev/pandas/issues/24265 - nv.validate_max(args, kwargs) + nv.validate_max((), kwargs) nv.validate_minmax_axis(axis, self.ndim) if is_period_dtype(self.dtype): @@ -1309,7 +1309,7 @@ def max(self, axis=None, skipna=True, *args, **kwargs): return self._box_func(result) return self._from_backing_data(result) - def mean(self, skipna=True, axis: Optional[int] = 0): + def mean(self, *, skipna=True, axis: Optional[int] = 0): """ Return the mean value of the Array. @@ -1350,8 +1350,8 @@ def mean(self, skipna=True, axis: Optional[int] = 0): return self._box_func(result) return self._from_backing_data(result) - def median(self, axis: Optional[int] = None, skipna: bool = True, *args, **kwargs): - nv.validate_median(args, kwargs) + def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): + nv.validate_median((), kwargs) if axis is not None and abs(axis) >= self.ndim: raise ValueError("abs(axis) must be less than ndim") diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 02f434342191f..4cfaae23e4389 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -439,19 +439,19 @@ def _cmp_method(self, other, op): return BooleanArray(result, mask) - def sum(self, skipna=True, min_count=0, **kwargs): + def sum(self, *, skipna=True, min_count=0, **kwargs): nv.validate_sum((), kwargs) return super()._reduce("sum", skipna=skipna, min_count=min_count) - def prod(self, skipna=True, min_count=0, **kwargs): + def prod(self, *, skipna=True, min_count=0, **kwargs): nv.validate_prod((), kwargs) return super()._reduce("prod", skipna=skipna, min_count=min_count) - def min(self, skipna=True, **kwargs): + def min(self, *, skipna=True, **kwargs): nv.validate_min((), kwargs) return super()._reduce("min", skipna=skipna) - def max(self, skipna=True, **kwargs): + def max(self, *, skipna=True, **kwargs): nv.validate_max((), kwargs) return super()._reduce("max", skipna=skipna) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e53276369a46f..e3d19e53e4517 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -603,19 +603,19 @@ def _arith_method(self, other, op): return self._maybe_mask_result(result, mask, other, op_name) - def sum(self, skipna=True, min_count=0, **kwargs): + def sum(self, *, skipna=True, min_count=0, **kwargs): nv.validate_sum((), kwargs) return super()._reduce("sum", skipna=skipna, min_count=min_count) - def prod(self, skipna=True, min_count=0, **kwargs): + def prod(self, *, skipna=True, min_count=0, **kwargs): nv.validate_prod((), kwargs) return super()._reduce("prod", skipna=skipna, min_count=min_count) - def min(self, skipna=True, **kwargs): + def min(self, *, skipna=True, **kwargs): nv.validate_min((), kwargs) return super()._reduce("min", skipna=skipna) - def max(self, skipna=True, **kwargs): + def max(self, *, skipna=True, **kwargs): nv.validate_max((), kwargs) return super()._reduce("max", skipna=skipna) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 810a2ce0cfde5..cd48f6cbc8170 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -271,77 +271,83 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: # ------------------------------------------------------------------------ # Reductions - def any(self, axis=None, out=None, keepdims=False, skipna=True): + def any(self, *, axis=None, out=None, keepdims=False, skipna=True): nv.validate_any((), dict(out=out, keepdims=keepdims)) return nanops.nanany(self._ndarray, axis=axis, skipna=skipna) - def all(self, axis=None, out=None, keepdims=False, skipna=True): + def all(self, *, axis=None, out=None, keepdims=False, skipna=True): nv.validate_all((), dict(out=out, keepdims=keepdims)) return nanops.nanall(self._ndarray, axis=axis, skipna=skipna) - def min(self, skipna: bool = True, **kwargs) -> Scalar: + def min(self, *, skipna: bool = True, **kwargs) -> Scalar: nv.validate_min((), kwargs) result = masked_reductions.min( values=self.to_numpy(), mask=self.isna(), skipna=skipna ) return result - def max(self, skipna: bool = True, **kwargs) -> Scalar: + def max(self, *, skipna: bool = True, **kwargs) -> Scalar: nv.validate_max((), kwargs) result = masked_reductions.max( values=self.to_numpy(), mask=self.isna(), skipna=skipna ) return result - def sum(self, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: nv.validate_sum((), kwargs) return nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) - def prod(self, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: nv.validate_prod((), kwargs) return nanops.nanprod( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) - def mean(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + def mean(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims)) return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) def median( - self, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True + self, *, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True ): nv.validate_median( (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims) ) return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) - def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + def std( + self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True + ): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" ) return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) - def var(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + def var( + self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True + ): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var" ) return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) - def sem(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + def sem( + self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True + ): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem" ) return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) - def kurt(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + def kurt(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt" ) return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) - def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew" ) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index c948291f29aeb..0d9d257810674 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -370,6 +370,7 @@ def __iter__(self): def sum( self, + *, axis=None, dtype=None, out=None, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index fdf27797db3ab..8e917bb770247 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -97,7 +97,11 @@ def __call__(self, alt: F) -> F: @functools.wraps(alt) def f( - values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, **kwds + values: np.ndarray, + *, + axis: Optional[int] = None, + skipna: bool = True, + **kwds, ): if len(self.kwargs) > 0: for k, v in self.kwargs.items(): @@ -404,6 +408,7 @@ def _na_for_min_count( def nanany( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -441,6 +446,7 @@ def nanany( def nanall( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -479,6 +485,7 @@ def nanall( @disallow("M8") def nansum( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, min_count: int = 0, @@ -550,6 +557,7 @@ def _mask_datetimelike_result( @bottleneck_switch() def nanmean( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -618,7 +626,7 @@ def nanmean( @bottleneck_switch() -def nanmedian(values, axis=None, skipna=True, mask=None): +def nanmedian(values, *, axis=None, skipna=True, mask=None): """ Parameters ---------- @@ -766,7 +774,7 @@ def _get_counts_nanvar( @bottleneck_switch(ddof=1) -def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): +def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None): """ Compute the standard deviation along given axis while ignoring NaNs @@ -806,7 +814,7 @@ def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): @disallow("M8", "m8") @bottleneck_switch(ddof=1) -def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): +def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None): """ Compute the variance along given axis while ignoring NaNs @@ -876,6 +884,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): @disallow("M8", "m8") def nansem( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, ddof: int = 1, @@ -910,14 +919,14 @@ def nansem( """ # This checks if non-numeric-like data is passed with numeric_only=False # and raises a TypeError otherwise - nanvar(values, axis, skipna, ddof=ddof, mask=mask) + nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) - var = nanvar(values, axis, skipna, ddof=ddof) + var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof) return np.sqrt(var) / np.sqrt(count) @@ -926,6 +935,7 @@ def _nanminmax(meth, fill_value_typ): @bottleneck_switch(name="nan" + meth) def reduction( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -965,6 +975,7 @@ def reduction( @disallow("O") def nanargmax( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -1009,6 +1020,7 @@ def nanargmax( @disallow("O") def nanargmin( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -1053,6 +1065,7 @@ def nanargmin( @disallow("M8", "m8") def nanskew( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -1137,6 +1150,7 @@ def nanskew( @disallow("M8", "m8") def nankurt( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -1230,6 +1244,7 @@ def nankurt( @disallow("M8", "m8") def nanprod( values: np.ndarray, + *, axis: Optional[int] = None, skipna: bool = True, min_count: int = 0, @@ -1409,7 +1424,7 @@ def _zero_out_fperr(arg): @disallow("M8", "m8") def nancorr( - a: np.ndarray, b: np.ndarray, method="pearson", min_periods: Optional[int] = None + a: np.ndarray, b: np.ndarray, *, method="pearson", min_periods: Optional[int] = None ): """ a, b: ndarrays @@ -1466,6 +1481,7 @@ def func(a, b): def nancov( a: np.ndarray, b: np.ndarray, + *, min_periods: Optional[int] = None, ddof: Optional[int] = 1, ): @@ -1581,6 +1597,7 @@ def _nanpercentile_1d( def nanpercentile( values: np.ndarray, q, + *, axis: int, na_value, mask: np.ndarray, @@ -1609,7 +1626,13 @@ def nanpercentile( if values.dtype.kind in ["m", "M"]: # need to cast to integer to avoid rounding errors in numpy result = nanpercentile( - values.view("i8"), q, axis, na_value.view("i8"), mask, ndim, interpolation + values.view("i8"), + q=q, + axis=axis, + na_value=na_value.view("i8"), + mask=mask, + ndim=ndim, + interpolation=interpolation, ) # Note: we have to do do `astype` and not view because in general we @@ -1638,7 +1661,7 @@ def nanpercentile( return np.percentile(values, q, axis=axis, interpolation=interpolation) -def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike: +def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: """ Cumulative function with skipna support.