Skip to content

CI: 3.10-dev tests failing #44126

Closed
Closed
@jreback

Description

@jreback

eg.. https://github.com/pandas-dev/pandas/pull/44125/checks?check_run_id=3958010839

traceback is large

maybe something started on numpy side
cc @seberg
cc @pandas-dev/pandas-core

pandas/tests/frame/test_reductions.py:570: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self =         foo       bar       baz
0       100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2  1.633849  0.952529 -0.082802
3  -0.22671  0.524945  2.436406
4 -0.484995  0.845172 -2.276719
axis = 1, skipna = True, level = None, ddof = 1, numeric_only = False
kwargs = {}

    @doc(
        _num_ddof_doc,
        desc="Return sample standard deviation over requested axis."
        "\n\nNormalized by N-1 by default. This can be changed using the "
        "ddof argument",
        name1=name1,
        name2=name2,
        axis_descr=axis_descr,
        notes=_std_notes,
    )
    def std(
        self,
        axis=None,
        skipna=True,
        level=None,
        ddof=1,
        numeric_only=None,
        **kwargs,
    ):
>       return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs)

pandas/core/generic.py:10703: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self =         foo       bar       baz
0       100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2  1.633849  0.952529 -0.082802
3  -0.22671  0.524945  2.436406
4 -0.484995  0.845172 -2.276719
axis = 1, skipna = True, level = None, ddof = 1, numeric_only = False
kwargs = {}

    def std(
        self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None, **kwargs
    ):
>       return self._stat_function_ddof(
            "std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs
        )

pandas/core/generic.py:10396: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self =         foo       bar       baz
0       100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2  1.633849  0.952529 -0.082802
3  -0.22671  0.524945  2.436406
4 -0.484995  0.845172 -2.276719
name = 'std', func = <function nanstd at 0x10e9e5750>, axis = 1, skipna = True
level = None, ddof = 1, numeric_only = False, kwargs = {}

    @final
    def _stat_function_ddof(
        self,
        name: str,
        func,
        axis=None,
        skipna=True,
        level=None,
        ddof=1,
        numeric_only=None,
        **kwargs,
    ):
        nv.validate_stat_ddof_func((), kwargs, fname=name)
        if axis is None:
            axis = self._stat_axis_number
        if level is not None:
            warnings.warn(
                "Using the level keyword in DataFrame and Series aggregations is "
                "deprecated and will be removed in a future version. Use groupby "
                "instead. df.var(level=1) should use df.groupby(level=1).var().",
                FutureWarning,
                stacklevel=4,
            )
            return self._agg_by_level(
                name, axis=axis, level=level, skipna=skipna, ddof=ddof
            )
>       return self._reduce(
            func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof
        )

pandas/core/generic.py:10375: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self =         foo       bar       baz
0       100 -0.699818 -0.060793
1 -0.008426 -0.982277 -0.177994
2  1.633849  0.952529 -0.082802
3  -0.22671  0.524945  2.436406
4 -0.484995  0.845172 -2.276719
op = <function nanstd at 0x10e9e5750>, name = 'std', axis = 0, skipna = True
numeric_only = False, filter_type = None, kwds = {'ddof': 1}, out_dtype = None

    def _reduce(
        self,
        op,
        name: str,
        *,
        axis: Axis = 0,
        skipna: bool = True,
        numeric_only: bool | None = None,
        filter_type=None,
        **kwds,
    ):
    
        assert filter_type is None or filter_type == "bool", filter_type
        out_dtype = "bool" if filter_type == "bool" else None
    
        if numeric_only is None and name in ["mean", "median"]:
            own_dtypes = [arr.dtype for arr in self._mgr.arrays]
    
            dtype_is_dt = np.array(
                [is_datetime64_any_dtype(dtype) for dtype in own_dtypes],
                dtype=bool,
            )
            if dtype_is_dt.any():
                warnings.warn(
                    "DataFrame.mean and DataFrame.median with numeric_only=None "
                    "will include datetime64 and datetime64tz columns in a "
                    "future version.",
                    FutureWarning,
                    stacklevel=5,
                )
                # Non-copy equivalent to
                #  dt64_cols = self.dtypes.apply(is_datetime64_any_dtype)
                #  cols = self.columns[~dt64_cols]
                #  self = self[cols]
                predicate = lambda x: not is_datetime64_any_dtype(x.dtype)
                mgr = self._mgr._get_data_subset(predicate)
                self = type(self)(mgr)
    
        # TODO: Make other agg func handle axis=None properly GH#21597
        axis = self._get_axis_number(axis)
        labels = self._get_agg_axis(axis)
        assert axis in [0, 1]
    
        def func(values: np.ndarray):
            # We only use this in the case that operates on self.values
            return op(values, axis=axis, skipna=skipna, **kwds)
    
        def blk_func(values, axis=1):
            if isinstance(values, ExtensionArray):
                if not is_1d_only_ea_obj(values) and not isinstance(
                    self._mgr, ArrayManager
                ):
                    return values._reduce(name, axis=1, skipna=skipna, **kwds)
                return values._reduce(name, skipna=skipna, **kwds)
            else:
                return op(values, axis=axis, skipna=skipna, **kwds)
    
        def _get_data() -> DataFrame:
            if filter_type is None:
                data = self._get_numeric_data()
            else:
                # GH#25101, GH#24434
                assert filter_type == "bool"
                data = self._get_bool_data()
            return data
    
        if numeric_only is not None or axis == 0:
            # For numeric_only non-None and axis non-None, we know
            #  which blocks to use and no try/except is needed.
            #  For numeric_only=None only the case with axis==0 and no object
            #  dtypes are unambiguous can be handled with BlockManager.reduce
            # Case with EAs see GH#35881
            df = self
            if numeric_only is True:
                df = _get_data()
            if axis == 1:
                df = df.T
                axis = 0
    
            ignore_failures = numeric_only is None
    
            # After possibly _get_data and transposing, we are now in the
            #  simple case where we can use BlockManager.reduce
>           res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)

pandas/core/frame.py:9998: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = BlockManager
Items: RangeIndex(start=0, stop=5, step=1)
Axis 1: Index(['foo', 'bar', 'baz'], dtype='object')
ObjectBlock: slice(0, 5, 1), 5 x 3, dtype: object
func = <function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>
ignore_failures = False

    def reduce(
        self: T, func: Callable, ignore_failures: bool = False
    ) -> tuple[T, np.ndarray]:
        """
        Apply reduction function blockwise, returning a single-row BlockManager.
    
        Parameters
        ----------
        func : reduction function
        ignore_failures : bool, default False
            Whether to drop blocks where func raises TypeError.
    
        Returns
        -------
        BlockManager
        np.ndarray
            Indexer of mgr_locs that are retained.
        """
        # If 2D, we assume that we're operating column-wise
        assert self.ndim == 2
    
        res_blocks: list[Block] = []
        for blk in self.blocks:
>           nbs = blk.reduce(func, ignore_failures)

pandas/core/internals/managers.py:1309: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = ObjectBlock: slice(0, 5, 1), 5 x 3, dtype: object
args = (<function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>, False)
kwargs = {}

    @wraps(meth)
    def newfunc(self, *args, **kwargs) -> list[Block]:
    
        if self.ndim == 1 or self.shape[0] == 1:
            return meth(self, *args, **kwargs)
        else:
            # Split and operate column-by-column
>           return self.split_and_operate(meth, *args, **kwargs)

pandas/core/internals/blocks.py:137: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = ObjectBlock: slice(0, 5, 1), 5 x 3, dtype: object
func = <function ObjectBlock.reduce at 0x10f2e5510>
args = (<function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>, False)
kwargs = {}, res_blocks = []
nb = ObjectBlock: slice(0, 1, 1), 1 x 3, dtype: object

    @final
    def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
        """
        Split the block and apply func column-by-column.
    
        Parameters
        ----------
        func : Block method
        *args
        **kwargs
    
        Returns
        -------
        List[Block]
        """
        assert self.ndim == 2 and self.shape[0] != 1
    
        res_blocks = []
        for nb in self._split():
>           rbs = func(nb, *args, **kwargs)

pandas/core/internals/blocks.py:495: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = ObjectBlock: slice(0, 1, 1), 1 x 3, dtype: object
func = <function DataFrame._reduce.<locals>.blk_func at 0x1474ce8c0>
ignore_failures = False

    @maybe_split
    def reduce(self, func, ignore_failures: bool = False) -> list[Block]:
        """
        For object-dtype, we operate column-wise.
        """
        assert self.ndim == 2
    
        try:
>           res = func(self.values)

pandas/core/internals/blocks.py:1827: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1

    def blk_func(values, axis=1):
        if isinstance(values, ExtensionArray):
            if not is_1d_only_ea_obj(values) and not isinstance(
                self._mgr, ArrayManager
            ):
                return values._reduce(name, axis=1, skipna=skipna, **kwds)
            return values._reduce(name, skipna=skipna, **kwds)
        else:
>           return op(values, axis=axis, skipna=skipna, **kwds)

pandas/core/frame.py:9970: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1, skipna = True, kwds = {'ddof': 1}, k = 'ddof', v = 1

    @functools.wraps(alt)
    def f(
        values: np.ndarray,
        *,
        axis: int | None = None,
        skipna: bool = True,
        **kwds,
    ):
        if len(self.kwargs) > 0:
            for k, v in self.kwargs.items():
                if k not in kwds:
                    kwds[k] = v
    
        if values.size == 0 and kwds.get("min_count") is None:
            # We are empty, returning NA for our type
            # Only applies for the default `min_count` of None
            # since that affects how empty arrays are handled.
            # TODO(GH-18976) update all the nanops methods to
            # correctly handle empty inputs and remove this check.
            # It *may* just be `var`
            return _na_for_min_count(values, axis)
    
        if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
            if kwds.get("mask", None) is None:
                # `mask` is not recognised by bottleneck, would raise
                #  TypeError if called
                kwds.pop("mask", None)
                result = bn_func(values, axis=axis, **kwds)
    
                # prefer to treat inf/-inf as NA, but must compute the func
                # twice :(
                if _has_infs(result):
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
            else:
                result = alt(values, axis=axis, skipna=skipna, **kwds)
        else:
>           result = alt(values, axis=axis, skipna=skipna, **kwds)

pandas/core/nanops.py:155: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)

    @bottleneck_switch(ddof=1)
    def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None):
        """
        Compute the standard deviation along given axis while ignoring NaNs
    
        Parameters
        ----------
        values : ndarray
        axis : int, optional
        skipna : bool, default True
        ddof : int, default 1
            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
            where N represents the number of elements.
        mask : ndarray[bool], optional
            nan-mask if known
    
        Returns
        -------
        result : float
            Unless input is a float array, in which case use the same
            precision as the input array.
    
        Examples
        --------
        >>> import pandas.core.nanops as nanops
        >>> s = pd.Series([1, np.nan, 2, 3])
        >>> nanops.nanstd(s)
        1.0
        """
        if values.dtype == "M8[ns]":
            values = values.view("m8[ns]")
    
        orig_dtype = values.dtype
        values, mask, _, _, _ = _get_values(values, skipna, mask=mask)
    
>       result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))

pandas/core/nanops.py:897: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

args = (array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object),)
kwargs = {'axis': 1, 'ddof': 1, 'mask': array([[False, False, False]]), 'skipna': True}
obj_iter = <itertools.chain object at 0x146c93820>

    @functools.wraps(f)
    def _f(*args, **kwargs):
        obj_iter = itertools.chain(args, kwargs.values())
        if any(self.check(obj) for obj in obj_iter):
            f_name = f.__name__.replace("nan", "")
            raise TypeError(
                f"reduction operation '{f_name}' not allowed for this dtype"
            )
        try:
            with np.errstate(invalid="ignore"):
>               return f(*args, **kwargs)

pandas/core/nanops.py:93: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1, skipna = True
kwds = {'ddof': 1, 'mask': array([[False, False, False]])}, k = 'ddof', v = 1

    @functools.wraps(alt)
    def f(
        values: np.ndarray,
        *,
        axis: int | None = None,
        skipna: bool = True,
        **kwds,
    ):
        if len(self.kwargs) > 0:
            for k, v in self.kwargs.items():
                if k not in kwds:
                    kwds[k] = v
    
        if values.size == 0 and kwds.get("min_count") is None:
            # We are empty, returning NA for our type
            # Only applies for the default `min_count` of None
            # since that affects how empty arrays are handled.
            # TODO(GH-18976) update all the nanops methods to
            # correctly handle empty inputs and remove this check.
            # It *may* just be `var`
            return _na_for_min_count(values, axis)
    
        if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
            if kwds.get("mask", None) is None:
                # `mask` is not recognised by bottleneck, would raise
                #  TypeError if called
                kwds.pop("mask", None)
                result = bn_func(values, axis=axis, **kwds)
    
                # prefer to treat inf/-inf as NA, but must compute the func
                # twice :(
                if _has_infs(result):
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
            else:
                result = alt(values, axis=axis, skipna=skipna, **kwds)
        else:
>           result = alt(values, axis=axis, skipna=skipna, **kwds)

pandas/core/nanops.py:155: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

values = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)

    @disallow("M8", "m8")
    @bottleneck_switch(ddof=1)
    def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None):
        """
        Compute the variance along given axis while ignoring NaNs
    
        Parameters
        ----------
        values : ndarray
        axis : int, optional
        skipna : bool, default True
        ddof : int, default 1
            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
            where N represents the number of elements.
        mask : ndarray[bool], optional
            nan-mask if known
    
        Returns
        -------
        result : float
            Unless input is a float array, in which case use the same
            precision as the input array.
    
        Examples
        --------
        >>> import pandas.core.nanops as nanops
        >>> s = pd.Series([1, np.nan, 2, 3])
        >>> nanops.nanvar(s)
        1.0
        """
        values = extract_array(values, extract_numpy=True)
        dtype = values.dtype
        mask = _maybe_get_mask(values, skipna, mask)
        if is_any_int_dtype(dtype):
            values = values.astype("f8")
            if mask is not None:
                values[mask] = np.nan
    
        if is_float_dtype(values.dtype):
            count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
        else:
            count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)
    
        if skipna and mask is not None:
            values = values.copy()
            np.putmask(values, mask, 0)
    
        # xref GH10242
        # Compute variance via two-pass algorithm, which is stable against
        # cancellation errors and relatively accurate for small numbers of
        # observations.
        #
        # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
>       avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count

pandas/core/nanops.py:954: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

a = array([['100', -0.6998177032455032, -0.06079253108280515]], dtype=object)
axis = 1, dtype = <class 'numpy.float64'>, out = None, keepdims = False
initial = <no value>, where = True

    def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
             initial=_NoValue, where=True):
>       return umr_sum(a, axis, dtype, out, keepdims, initial, where)
E       TypeError: No loop matching the specified signature and casting was found for ufunc add

Metadata

Metadata

Assignees

No one assigned

    Labels

    CIContinuous IntegrationClosing CandidateMay be closeable, needs more eyeballsPython 3.10Testingpandas testing functions or related to the test suite

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions