Closed
Description
eg.. https://github.com/pandas-dev/pandas/pull/44125/checks?check_run_id=3958010839
traceback is large
maybe something started on numpy side
cc @seberg
cc @pandas-dev/pandas-core
pandas/tests/frame/test_reductions.py:570:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = foo bar baz
0 100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2 1.633849 0.952529 -0.082802
3 -0.22671 0.524945 2.436406
4 -0.484995 0.845172 -2.276719
axis = 1, skipna = True, level = None, ddof = 1, numeric_only = False
kwargs = {}
@doc(
_num_ddof_doc,
desc="Return sample standard deviation over requested axis."
"\n\nNormalized by N-1 by default. This can be changed using the "
"ddof argument",
name1=name1,
name2=name2,
axis_descr=axis_descr,
notes=_std_notes,
)
def std(
self,
axis=None,
skipna=True,
level=None,
ddof=1,
numeric_only=None,
**kwargs,
):
> return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs)
pandas/core/generic.py:10703:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = foo bar baz
0 100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2 1.633849 0.952529 -0.082802
3 -0.22671 0.524945 2.436406
4 -0.484995 0.845172 -2.276719
axis = 1, skipna = True, level = None, ddof = 1, numeric_only = False
kwargs = {}
def std(
self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None, **kwargs
):
> return self._stat_function_ddof(
"std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs
)
pandas/core/generic.py:10396:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = foo bar baz
0 100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2 1.633849 0.952529 -0.082802
3 -0.22671 0.524945 2.436406
4 -0.484995 0.845172 -2.276719
name = 'std', func = <function nanstd at 0x10e9e5750>, axis = 1, skipna = True
level = None, ddof = 1, numeric_only = False, kwargs = {}
@final
def _stat_function_ddof(
self,
name: str,
func,
axis=None,
skipna=True,
level=None,
ddof=1,
numeric_only=None,
**kwargs,
):
nv.validate_stat_ddof_func((), kwargs, fname=name)
if axis is None:
axis = self._stat_axis_number
if level is not None:
warnings.warn(
"Using the level keyword in DataFrame and Series aggregations is "
"deprecated and will be removed in a future version. Use groupby "
"instead. df.var(level=1) should use df.groupby(level=1).var().",
FutureWarning,
stacklevel=4,
)
return self._agg_by_level(
name, axis=axis, level=level, skipna=skipna, ddof=ddof
)
> return self._reduce(
func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof
)
pandas/core/generic.py:10375:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = foo bar baz
0 100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2 1.633849 0.952529 -0.082802
3 -0.22671 0.524945 2.436406
4 -0.484995 0.845172 -2.276719
op = <function nanstd at 0x10e9e5750>, name = 'std', axis = 0, skipna = True
numeric_only = False, filter_type = None, kwds = {'ddof': 1}, out_dtype = None
def _reduce(
self,
op,
name: str,
*,
axis: Axis = 0,
skipna: bool = True,
numeric_only: bool | None = None,
filter_type=None,
**kwds,
):
assert filter_type is None or filter_type == "bool", filter_type
out_dtype = "bool" if filter_type == "bool" else None
if numeric_only is None and name in ["mean", "median"]:
own_dtypes = [arr.dtype for arr in self._mgr.arrays]
dtype_is_dt = np.array(
[is_datetime64_any_dtype(dtype) for dtype in own_dtypes],
dtype=bool,
)
if dtype_is_dt.any():
warnings.warn(
"DataFrame.mean and DataFrame.median with numeric_only=None "
"will include datetime64 and datetime64tz columns in a "
"future version.",
FutureWarning,
stacklevel=5,
)
# Non-copy equivalent to
# dt64_cols = self.dtypes.apply(is_datetime64_any_dtype)
# cols = self.columns[~dt64_cols]
# self = self[cols]
predicate = lambda x: not is_datetime64_any_dtype(x.dtype)
mgr = self._mgr._get_data_subset(predicate)
self = type(self)(mgr)
# TODO: Make other agg func handle axis=None properly GH#21597
axis = self._get_axis_number(axis)
labels = self._get_agg_axis(axis)
assert axis in [0, 1]
def func(values: np.ndarray):
# We only use this in the case that operates on self.values
return op(values, axis=axis, skipna=skipna, **kwds)
def blk_func(values, axis=1):
if isinstance(values, ExtensionArray):
if not is_1d_only_ea_obj(values) and not isinstance(
self._mgr, ArrayManager
):
return values._reduce(name, axis=1, skipna=skipna, **kwds)
return values._reduce(name, skipna=skipna, **kwds)
else:
return op(values, axis=axis, skipna=skipna, **kwds)
def _get_data() -> DataFrame:
if filter_type is None:
data = self._get_numeric_data()
else:
# GH#25101, GH#24434
assert filter_type == "bool"
data = self._get_bool_data()
return data
if numeric_only is not None or axis == 0:
# For numeric_only non-None and axis non-None, we know
# which blocks to use and no try/except is needed.
# For numeric_only=None only the case with axis==0 and no object
# dtypes are unambiguous can be handled with BlockManager.reduce
# Case with EAs see GH#35881
df = self
if numeric_only is True:
df = _get_data()
if axis == 1:
df = df.T
axis = 0
ignore_failures = numeric_only is None
# After possibly _get_data and transposing, we are now in the
# simple case where we can use BlockManager.reduce
> res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
pandas/core/frame.py:9998:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = BlockManager
Items: RangeIndex(start=0, stop=5, step=1)
Axis 1: Index(['foo', 'bar', 'baz'], dtype='object')
ObjectBlock: slice(0, 5, 1), 5 x 3, dtype: object
func = <function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>
ignore_failures = False
def reduce(
self: T, func: Callable, ignore_failures: bool = False
) -> tuple[T, np.ndarray]:
"""
Apply reduction function blockwise, returning a single-row BlockManager.
Parameters
----------
func : reduction function
ignore_failures : bool, default False
Whether to drop blocks where func raises TypeError.
Returns
-------
BlockManager
np.ndarray
Indexer of mgr_locs that are retained.
"""
# If 2D, we assume that we're operating column-wise
assert self.ndim == 2
res_blocks: list[Block] = []
for blk in self.blocks:
> nbs = blk.reduce(func, ignore_failures)
pandas/core/internals/managers.py:1309:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = ObjectBlock: slice(0, 5, 1), 5 x 3, dtype: object
args = (<function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>, False)
kwargs = {}
@wraps(meth)
def newfunc(self, *args, **kwargs) -> list[Block]:
if self.ndim == 1 or self.shape[0] == 1:
return meth(self, *args, **kwargs)
else:
# Split and operate column-by-column
> return self.split_and_operate(meth, *args, **kwargs)
pandas/core/internals/blocks.py:137:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = ObjectBlock: slice(0, 5, 1), 5 x 3, dtype: object
func = <function ObjectBlock.reduce at 0x10f2e5510>
args = (<function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>, False)
kwargs = {}, res_blocks = []
nb = ObjectBlock: slice(0, 1, 1), 1 x 3, dtype: object
@final
def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
"""
Split the block and apply func column-by-column.
Parameters
----------
func : Block method
*args
**kwargs
Returns
-------
List[Block]
"""
assert self.ndim == 2 and self.shape[0] != 1
res_blocks = []
for nb in self._split():
> rbs = func(nb, *args, **kwargs)
pandas/core/internals/blocks.py:495:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = ObjectBlock: slice(0, 1, 1), 1 x 3, dtype: object
func = <function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>
ignore_failures = False
@maybe_split
def reduce(self, func, ignore_failures: bool = False) -> list[Block]:
"""
For object-dtype, we operate column-wise.
"""
assert self.ndim == 2
try:
> res = func(self.values)
pandas/core/internals/blocks.py:1827:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1
def blk_func(values, axis=1):
if isinstance(values, ExtensionArray):
if not is_1d_only_ea_obj(values) and not isinstance(
self._mgr, ArrayManager
):
return values._reduce(name, axis=1, skipna=skipna, **kwds)
return values._reduce(name, skipna=skipna, **kwds)
else:
> return op(values, axis=axis, skipna=skipna, **kwds)
pandas/core/frame.py:9970:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1, skipna = True, kwds = {'ddof': 1}, k = 'ddof', v = 1
@functools.wraps(alt)
def f(
values: np.ndarray,
*,
axis: int | None = None,
skipna: bool = True,
**kwds,
):
if len(self.kwargs) > 0:
for k, v in self.kwargs.items():
if k not in kwds:
kwds[k] = v
if values.size == 0 and kwds.get("min_count") is None:
# We are empty, returning NA for our type
# Only applies for the default `min_count` of None
# since that affects how empty arrays are handled.
# TODO(GH-18976) update all the nanops methods to
# correctly handle empty inputs and remove this check.
# It *may* just be `var`
return _na_for_min_count(values, axis)
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
if kwds.get("mask", None) is None:
# `mask` is not recognised by bottleneck, would raise
# TypeError if called
kwds.pop("mask", None)
result = bn_func(values, axis=axis, **kwds)
# prefer to treat inf/-inf as NA, but must compute the func
# twice :(
if _has_infs(result):
result = alt(values, axis=axis, skipna=skipna, **kwds)
else:
result = alt(values, axis=axis, skipna=skipna, **kwds)
else:
> result = alt(values, axis=axis, skipna=skipna, **kwds)
pandas/core/nanops.py:155:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
@bottleneck_switch(ddof=1)
def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None):
"""
Compute the standard deviation along given axis while ignoring NaNs
Parameters
----------
values : ndarray
axis : int, optional
skipna : bool, default True
ddof : int, default 1
Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.
mask : ndarray[bool], optional
nan-mask if known
Returns
-------
result : float
Unless input is a float array, in which case use the same
precision as the input array.
Examples
--------
>>> import pandas.core.nanops as nanops
>>> s = pd.Series([1, np.nan, 2, 3])
>>> nanops.nanstd(s)
1.0
"""
if values.dtype == "M8[ns]":
values = values.view("m8[ns]")
orig_dtype = values.dtype
values, mask, _, _, _ = _get_values(values, skipna, mask=mask)
> result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))
pandas/core/nanops.py:897:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
args = (array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object),)
kwargs = {'axis': 1, 'ddof': 1, 'mask': array([[False, False, False]]), 'skipna': True}
obj_iter = <itertools.chain object at 0x146c93820>
@functools.wraps(f)
def _f(*args, **kwargs):
obj_iter = itertools.chain(args, kwargs.values())
if any(self.check(obj) for obj in obj_iter):
f_name = f.__name__.replace("nan", "")
raise TypeError(
f"reduction operation '{f_name}' not allowed for this dtype"
)
try:
with np.errstate(invalid="ignore"):
> return f(*args, **kwargs)
pandas/core/nanops.py:93:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1, skipna = True
kwds = {'ddof': 1, 'mask': array([[False, False, False]])}, k = 'ddof', v = 1
@functools.wraps(alt)
def f(
values: np.ndarray,
*,
axis: int | None = None,
skipna: bool = True,
**kwds,
):
if len(self.kwargs) > 0:
for k, v in self.kwargs.items():
if k not in kwds:
kwds[k] = v
if values.size == 0 and kwds.get("min_count") is None:
# We are empty, returning NA for our type
# Only applies for the default `min_count` of None
# since that affects how empty arrays are handled.
# TODO(GH-18976) update all the nanops methods to
# correctly handle empty inputs and remove this check.
# It *may* just be `var`
return _na_for_min_count(values, axis)
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
if kwds.get("mask", None) is None:
# `mask` is not recognised by bottleneck, would raise
# TypeError if called
kwds.pop("mask", None)
result = bn_func(values, axis=axis, **kwds)
# prefer to treat inf/-inf as NA, but must compute the func
# twice :(
if _has_infs(result):
result = alt(values, axis=axis, skipna=skipna, **kwds)
else:
result = alt(values, axis=axis, skipna=skipna, **kwds)
else:
> result = alt(values, axis=axis, skipna=skipna, **kwds)
pandas/core/nanops.py:155:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
@disallow("M8", "m8")
@bottleneck_switch(ddof=1)
def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None):
"""
Compute the variance along given axis while ignoring NaNs
Parameters
----------
values : ndarray
axis : int, optional
skipna : bool, default True
ddof : int, default 1
Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.
mask : ndarray[bool], optional
nan-mask if known
Returns
-------
result : float
Unless input is a float array, in which case use the same
precision as the input array.
Examples
--------
>>> import pandas.core.nanops as nanops
>>> s = pd.Series([1, np.nan, 2, 3])
>>> nanops.nanvar(s)
1.0
"""
values = extract_array(values, extract_numpy=True)
dtype = values.dtype
mask = _maybe_get_mask(values, skipna, mask)
if is_any_int_dtype(dtype):
values = values.astype("f8")
if mask is not None:
values[mask] = np.nan
if is_float_dtype(values.dtype):
count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
else:
count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)
if skipna and mask is not None:
values = values.copy()
np.putmask(values, mask, 0)
# xref GH10242
# Compute variance via two-pass algorithm, which is stable against
# cancellation errors and relatively accurate for small numbers of
# observations.
#
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
> avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
pandas/core/nanops.py:954:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
a = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1, dtype = <class 'numpy.float64'>, out = None, keepdims = False
initial = <no value>, where = True
def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
initial=_NoValue, where=True):
> return umr_sum(a, axis, dtype, out, keepdims, initial, where)
E TypeError: No loop matching the specified signature and casting was found for ufunc add