diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 461363d295f6a..a2162f2e66b36 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -153,6 +153,9 @@ jobs: run: | source activate pandas-dev pytest pandas/tests/frame/methods --array-manager + pytest pandas/tests/frame/test_reductions.py --array-manager + pytest pandas/tests/reductions/ --array-manager + pytest pandas/tests/generic/test_generic.py --array-manager pytest pandas/tests/arithmetic/ --array-manager pytest pandas/tests/reshape/merge --array-manager diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index cd8d3e547abbd..97d1303824cd4 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -16,7 +16,10 @@ import numpy as np -from pandas._libs import lib +from pandas._libs import ( + NaT, + lib, +) from pandas._typing import ( ArrayLike, DtypeObj, @@ -33,6 +36,8 @@ is_dtype_equal, is_extension_array_dtype, is_numeric_dtype, + is_object_dtype, + is_timedelta64_ns_dtype, ) from pandas.core.dtypes.dtypes import ( ExtensionDtype, @@ -50,7 +55,11 @@ import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseDtype -from pandas.core.construction import extract_array +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, + sanitize_array, +) from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import ( Index, @@ -201,18 +210,48 @@ def _verify_integrity(self) -> None: def reduce( self: T, func: Callable, ignore_failures: bool = False ) -> Tuple[T, np.ndarray]: - # TODO this still fails because `func` assumes to work on 2D arrays - # TODO implement ignore_failures - assert self.ndim == 2 + """ + Apply reduction function column-wise, returning a single-row ArrayManager. - res_arrays = [] - for arr in self.arrays: - res = func(arr, axis=0) - res_arrays.append(np.array([res])) + Parameters + ---------- + func : reduction function + ignore_failures : bool, default False + Whether to drop columns where func raises TypeError. - index = Index([None]) # placeholder - new_mgr = type(self)(res_arrays, [index, self.items]) - indexer = np.arange(self.shape[0]) + Returns + ------- + ArrayManager + np.ndarray + Indexer of column indices that are retained. + """ + result_arrays: List[np.ndarray] = [] + result_indices: List[int] = [] + for i, arr in enumerate(self.arrays): + try: + res = func(arr, axis=0) + except TypeError: + if not ignore_failures: + raise + else: + # TODO NaT doesn't preserve dtype, so we need to ensure to create + # a timedelta result array if original was timedelta + # what if datetime results in timedelta? (eg std) + if res is NaT and is_timedelta64_ns_dtype(arr.dtype): + result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]")) + else: + result_arrays.append(sanitize_array([res], None)) + result_indices.append(i) + + index = Index._simple_new(np.array([None], dtype=object)) # placeholder + if ignore_failures: + indexer = np.array(result_indices) + columns = self.items[result_indices] + else: + indexer = np.arange(self.shape[0]) + columns = self.items + + new_mgr = type(self)(result_arrays, [index, columns]) return new_mgr, indexer def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: @@ -489,14 +528,17 @@ def _get_data_subset(self, predicate: Callable) -> ArrayManager: def get_bool_data(self, copy: bool = False) -> ArrayManager: """ - Select columns that are bool-dtype. + Select columns that are bool-dtype and object-dtype columns that are all-bool. Parameters ---------- copy : bool, default False Whether to copy the blocks """ - return self._get_data_subset(lambda arr: is_bool_dtype(arr.dtype)) + return self._get_data_subset( + lambda arr: is_bool_dtype(arr.dtype) + or (is_object_dtype(arr.dtype) and lib.is_bool_array(arr)) + ) def get_numeric_data(self, copy: bool = False) -> ArrayManager: """ @@ -693,6 +735,10 @@ def iset(self, loc: Union[int, slice, np.ndarray], value): assert value.shape[1] == 1 value = value[0, :] + # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item + # but we should avoid that and pass directly the proper array + value = ensure_wrapped_if_datetimelike(value) + assert isinstance(value, (np.ndarray, ExtensionArray)) assert value.ndim == 1 assert len(value) == len(self._axes[0]) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index decff32baa970..e3145e0cc5c9f 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -6,6 +6,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( Categorical, @@ -291,6 +293,7 @@ def test_numpy_minmax_timedelta64(self): with pytest.raises(ValueError, match=errmsg): np.argmax(td, out=0) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_timedelta_ops(self): # GH#4984 # make sure ops return Timedelta