diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index f1e370a0b8316..97de305077545 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -770,8 +770,10 @@ Styler Metadata ^^^^^^^^ +- Fixed metadata propagation in :meth:`DataFrame.max`, :meth:`DataFrame.min`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`Series.mode`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt` (:issue:`28283`) - Fixed metadata propagation in :meth:`DataFrame.squeeze`, and :meth:`DataFrame.describe` (:issue:`28283`) - Fixed metadata propagation in :meth:`DataFrame.std` (:issue:`28283`) +- Other ^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5207fd5db1c4d..f7fe51b1eee0a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11137,12 +11137,13 @@ def any( # type: ignore[override] bool_only: bool = False, skipna: bool = True, **kwargs, - ) -> Series: - # error: Incompatible return value type (got "Union[Series, bool]", - # expected "Series") - return self._logical_func( # type: ignore[return-value] + ) -> Series | bool: + result = self._logical_func( "any", nanops.nanany, axis, bool_only, skipna, **kwargs ) + if isinstance(result, Series): + result = result.__finalize__(self, method="any") + return result @doc(make_doc("all", ndim=2)) def all( @@ -11151,12 +11152,13 @@ def all( bool_only: bool = False, skipna: bool = True, **kwargs, - ) -> Series: - # error: Incompatible return value type (got "Union[Series, bool]", - # expected "Series") - return self._logical_func( # type: ignore[return-value] + ) -> Series | bool: + result = self._logical_func( "all", nanops.nanall, axis, bool_only, skipna, **kwargs ) + if isinstance(result, Series): + result = result.__finalize__(self, method="all") + return result @doc(make_doc("min", ndim=2)) def min( @@ -11166,7 +11168,10 @@ def min( numeric_only: bool = False, **kwargs, ): - return super().min(axis, skipna, numeric_only, **kwargs) + result = super().min(axis, skipna, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="min") + return result @doc(make_doc("max", ndim=2)) def max( @@ -11176,7 +11181,10 @@ def max( numeric_only: bool = False, **kwargs, ): - return super().max(axis, skipna, numeric_only, **kwargs) + result = super().max(axis, skipna, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="max") + return result @doc(make_doc("sum", ndim=2)) def sum( @@ -11199,7 +11207,8 @@ def prod( min_count: int = 0, **kwargs, ): - return super().prod(axis, skipna, numeric_only, min_count, **kwargs) + result = super().prod(axis, skipna, numeric_only, min_count, **kwargs) + return result.__finalize__(self, method="prod") @doc(make_doc("mean", ndim=2)) def mean( @@ -11209,7 +11218,10 @@ def mean( numeric_only: bool = False, **kwargs, ): - return super().mean(axis, skipna, numeric_only, **kwargs) + result = super().mean(axis, skipna, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="mean") + return result @doc(make_doc("median", ndim=2)) def median( @@ -11219,7 +11231,10 @@ def median( numeric_only: bool = False, **kwargs, ): - return super().median(axis, skipna, numeric_only, **kwargs) + result = super().median(axis, skipna, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="median") + return result @doc(make_doc("sem", ndim=2)) def sem( @@ -11230,7 +11245,10 @@ def sem( numeric_only: bool = False, **kwargs, ): - return super().sem(axis, skipna, ddof, numeric_only, **kwargs) + result = super().sem(axis, skipna, ddof, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="sem") + return result @doc(make_doc("var", ndim=2)) def var( @@ -11241,7 +11259,10 @@ def var( numeric_only: bool = False, **kwargs, ): - return super().var(axis, skipna, ddof, numeric_only, **kwargs) + result = super().var(axis, skipna, ddof, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="var") + return result @doc(make_doc("std", ndim=2)) def std( @@ -11252,8 +11273,10 @@ def std( numeric_only: bool = False, **kwargs, ): - result = cast(Series, super().std(axis, skipna, ddof, numeric_only, **kwargs)) - return result.__finalize__(self, method="std") + result = super().std(axis, skipna, ddof, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="std") + return result @doc(make_doc("skew", ndim=2)) def skew( @@ -11263,7 +11286,10 @@ def skew( numeric_only: bool = False, **kwargs, ): - return super().skew(axis, skipna, numeric_only, **kwargs) + result = super().skew(axis, skipna, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="skew") + return result @doc(make_doc("kurt", ndim=2)) def kurt( @@ -11273,7 +11299,10 @@ def kurt( numeric_only: bool = False, **kwargs, ): - return super().kurt(axis, skipna, numeric_only, **kwargs) + result = super().kurt(axis, skipna, numeric_only, **kwargs) + if isinstance(result, Series): + result = result.__finalize__(self, method="kurt") + return result kurtosis = kurt product = prod diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 8c464c2229515..e30881e1a79c6 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -6,7 +6,10 @@ Iterable, ) import itertools -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + cast, +) import numpy as np @@ -455,10 +458,12 @@ def from_dummies( f"Received 'data' of type: {type(data).__name__}" ) - if data.isna().any().any(): + col_isna_mask = cast(Series, data.isna().any()) + + if col_isna_mask.any(): raise ValueError( "Dummy DataFrame contains NA value in column: " - f"'{data.isna().any().idxmax()}'" + f"'{col_isna_mask.idxmax()}'" ) # index data with a list of all columns that are dummies diff --git a/pandas/core/series.py b/pandas/core/series.py index e6080144627ba..4677dc2274a52 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2202,7 +2202,7 @@ def mode(self, dropna: bool = True) -> Series: # Ensure index is type stable (should always use int index) return self._constructor( res_values, index=range(len(res_values)), name=self.name, copy=False - ) + ).__finalize__(self, method="mode") def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation """ diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index f827eaf63a342..1522b83a4f5d0 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -180,10 +180,8 @@ (pd.DataFrame, frame_data, operator.methodcaller("idxmin")), (pd.DataFrame, frame_data, operator.methodcaller("idxmax")), (pd.DataFrame, frame_data, operator.methodcaller("mode")), - pytest.param( - (pd.Series, [0], operator.methodcaller("mode")), - marks=not_implemented_mark, - ), + (pd.Series, [0], operator.methodcaller("mode")), + (pd.DataFrame, frame_data, operator.methodcaller("median")), ( pd.DataFrame, frame_data, @@ -363,17 +361,24 @@ # Cumulative reductions (pd.Series, ([1],), operator.methodcaller("cumsum")), (pd.DataFrame, frame_data, operator.methodcaller("cumsum")), + (pd.Series, ([1],), operator.methodcaller("cummin")), + (pd.DataFrame, frame_data, operator.methodcaller("cummin")), + (pd.Series, ([1],), operator.methodcaller("cummax")), + (pd.DataFrame, frame_data, operator.methodcaller("cummax")), + (pd.Series, ([1],), operator.methodcaller("cumprod")), + (pd.DataFrame, frame_data, operator.methodcaller("cumprod")), # Reductions - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("any")), - marks=not_implemented_mark, - ), + (pd.DataFrame, frame_data, operator.methodcaller("any")), + (pd.DataFrame, frame_data, operator.methodcaller("all")), + (pd.DataFrame, frame_data, operator.methodcaller("min")), + (pd.DataFrame, frame_data, operator.methodcaller("max")), (pd.DataFrame, frame_data, operator.methodcaller("sum")), (pd.DataFrame, frame_data, operator.methodcaller("std")), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("mean")), - marks=not_implemented_mark, - ), + (pd.DataFrame, frame_data, operator.methodcaller("mean")), + (pd.DataFrame, frame_data, operator.methodcaller("prod")), + (pd.DataFrame, frame_data, operator.methodcaller("sem")), + (pd.DataFrame, frame_data, operator.methodcaller("skew")), + (pd.DataFrame, frame_data, operator.methodcaller("kurt")), ]