Skip to content

REGR: Revert GH51335 #52250

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ Other API changes
- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
- The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`)
- :class:`DataFrame` and :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`, :issue:`49603`)
- :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`, :issue:`49603`)
- Division by zero with :class:`ArrowDtype` dtypes returns ``-inf``, ``nan``, or ``inf`` depending on the numerator, instead of raising (:issue:`51541`)
- Added :func:`pandas.api.types.is_any_real_numeric_dtype` to check for real numeric dtypes (:issue:`51152`)
- :meth:`~arrays.ArrowExtensionArray.value_counts` now returns data with :class:`ArrowDtype` with ``pyarrow.int64`` type instead of ``"Int64"`` type (:issue:`51462`)
Expand Down Expand Up @@ -1204,11 +1204,11 @@ Numeric
^^^^^^^
- Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
- Bug in arithmetic operations on :class:`Series` not propagating mask when combining masked dtypes and numpy dtypes (:issue:`45810`, :issue:`42630`)
- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`)
- Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`)
- Bug in :meth:`~arrays.ArrowExtensionArray.mode` where ``dropna=False`` was not respected when there was ``NA`` values (:issue:`50982`)
- Bug in :meth:`DataFrame.query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`)
- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with tz-aware data containing ``pd.NaT`` and ``axis=1`` would return incorrect results (:issue:`51242`)

Conversion
^^^^^^^^^^
Expand Down
83 changes: 47 additions & 36 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
is_integer_dtype,
is_iterator,
is_list_like,
is_object_dtype,
is_scalar,
is_sequence,
needs_i8_conversion,
Expand Down Expand Up @@ -10925,44 +10926,54 @@ def _get_data() -> DataFrame:
data = self._get_bool_data()
return data

# Case with EAs see GH#35881
df = self
if numeric_only:
df = _get_data()
if numeric_only or axis == 0:
# For numeric_only non-None and axis non-None, we know
# which blocks to use and no try/except is needed.
# For numeric_only=None only the case with axis==0 and no object
# dtypes are unambiguous can be handled with BlockManager.reduce
# Case with EAs see GH#35881
df = self
if numeric_only:
df = _get_data()
if axis == 1:
df = df.T
axis = 0

# After possibly _get_data and transposing, we are now in the
# simple case where we can use BlockManager.reduce
res = df._mgr.reduce(blk_func)
out = df._constructor(res).iloc[0]
if out_dtype is not None:
out = out.astype(out_dtype)
if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
# Even if we are object dtype, follow numpy and return
# float64, see test_apply_funcs_over_empty
out = out.astype(np.float64)

return out

assert not numeric_only and axis in (1, None)

data = self
values = data.values
result = func(values)

if hasattr(result, "dtype"):
if filter_type == "bool" and notna(result).all():
result = result.astype(np.bool_)
elif filter_type is None and is_object_dtype(result.dtype):
try:
result = result.astype(np.float64)
except (ValueError, TypeError):
# try to coerce to the original dtypes item by item if we can
pass

if axis is None:
return func(df.values)
elif axis == 1:
if len(df.index) == 0:
# Taking a transpose would result in no columns, losing the dtype.
# In the empty case, reducing along axis 0 or 1 gives the same
# result dtype, so reduce with axis=0 and ignore values
result = df._reduce(
op,
name,
axis=0,
skipna=skipna,
numeric_only=False,
filter_type=filter_type,
**kwds,
).iloc[:0]
result.index = df.index
return result
df = df.T

# After possibly _get_data and transposing, we are now in the
# simple case where we can use BlockManager.reduce
res = df._mgr.reduce(blk_func)
out = df._constructor(res).iloc[0]
if out_dtype is not None:
out = out.astype(out_dtype)
elif (df._mgr.get_dtypes() == object).any():
out = out.astype(object)
elif len(self) == 0 and name in ("sum", "prod"):
# Even if we are object dtype, follow numpy and return
# float64, see test_apply_funcs_over_empty
out = out.astype(np.float64)
return result

return out
labels = self._get_agg_axis(axis)
result = self._constructor_sliced(result, index=labels)
return result

def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
"""
Expand Down
21 changes: 17 additions & 4 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,16 @@ def wrapper(x):
DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object),
],
)
def test_stat_operators_attempt_obj_array(self, method, df, axis):
def test_stat_operators_attempt_obj_array(
self, method, df, axis, request, using_array_manager
):
# GH#676
if (
axis in (1, "columns")
or method not in ("sum", "prod", "min", "max")
or using_array_manager
):
request.node.add_marker(pytest.mark.xfail(reason="Revert of GH#51335"))
assert df.values.dtype == np.object_
result = getattr(df, method)(axis=axis)
expected = getattr(df.astype("f8"), method)(axis=axis).astype(object)
Expand Down Expand Up @@ -402,6 +410,7 @@ def test_mean_includes_datetimes(self, tz):
expected = Series([Timestamp("2000", tz=tz)], index=["A"])
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(reason="Revert of GH#51335")
def test_mean_mixed_string_decimal(self):
# GH 11670
# possible bug when calculating mean of DataFrame?
Expand Down Expand Up @@ -731,7 +740,9 @@ def test_sum_corner(self):
tm.makePeriodIndex(0),
],
)
def test_axis_1_empty(self, all_reductions, index, using_array_manager):
def test_axis_1_empty(self, all_reductions, index, using_array_manager, request):
if all_reductions not in ("count", "any", "all"):
request.node.add_marker(pytest.mark.xfail(reason="Revert of GH#51335"))
df = DataFrame(columns=["a"], index=index)
result = getattr(df, all_reductions)(axis=1)
if all_reductions in ("any", "all"):
Expand Down Expand Up @@ -1464,6 +1475,7 @@ def test_preserve_timezone(self, initial: str, method):
result = getattr(df, method)(axis=1)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(reason="GH#51335")
@pytest.mark.parametrize("method", ["min", "max"])
def test_minmax_tzaware_skipna_axis_1(self, method, skipna):
# GH#51242
Expand Down Expand Up @@ -1671,9 +1683,10 @@ def test_prod_sum_min_count_mixed_object():

@pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"])
@pytest.mark.parametrize("numeric_only", [True, False])
def test_reduction_axis_none_returns_scalar(method, numeric_only):
def test_reduction_axis_none_returns_scalar(method, numeric_only, request):
# GH#21597 As of 2.0, axis=None reduces over all axes.

if numeric_only:
request.node.add_marker(pytest.mark.xfail(reason="Revert of GH#51335"))
df = DataFrame(np.random.randn(4, 4))

result = getattr(df, method)(axis=None, numeric_only=numeric_only)
Expand Down