From 2afaaadbdbd8e1d3384860da155ab349a7b8504e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 3 Apr 2020 09:45:02 +0200 Subject: [PATCH 1/4] INT: provide helpers for accessing the values of DataFrame columns --- pandas/core/frame.py | 23 +++++++++++++++++++++-- pandas/core/internals/managers.py | 9 +++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index be04bbc7942c4..b347a1603e56e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2568,6 +2568,21 @@ def _ixs(self, i: int, axis: int = 0): return result + def _ixs_values(self, i: int) -> Union[np.ndarray, ExtensionArray]: + """ + Get the values of the ith column (ndarray or ExtensionArray, as stored + in the Block) + """ + return self._data.iget_values(i) + + def _iter_arrays(self): + """ + Iterate over the arrays of all columns in order. + This returns the values as stored in the Block (ndarray or ExtensionArray). + """ + for i in range(len(self.columns)): + yield self._ixs_values(i) + def __getitem__(self, key): key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) @@ -7926,8 +7941,12 @@ def _reduce( assert filter_type is None or filter_type == "bool", filter_type - dtype_is_dt = self.dtypes.apply( - lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x) + dtype_is_dt = np.array( + [ + is_datetime64_any_dtype(values.dtype) or is_period_dtype(values.dtype) + for values in self._iter_arrays() + ], + dtype=bool, ) if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any(): warnings.warn( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ebb4899c1ba9a..23911c9d4c20f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -31,6 +31,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject from pandas.core.construction import extract_array @@ -1001,6 +1002,14 @@ def iget(self, i: int) -> "SingleBlockManager": self.axes[1], ) + def iget_values(self, i: int) -> Union[np.ndarray, ExtensionArray]: + """ + Return the data for column i as the values (ndarray or ExtensionArray). + """ + block = self.blocks[self.blknos[i]] + values = block.iget(self.blklocs[i]) + return values + def delete(self, item): """ Delete selected item (items if non-unique) in-place. From 18203e802e1229765c4c20783d638d8745bf32f1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 3 Apr 2020 21:21:57 +0200 Subject: [PATCH 2/4] add typing --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b347a1603e56e..7c8795ac8c07d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,6 +23,7 @@ FrozenSet, Hashable, Iterable, + Iterator, List, Optional, Sequence, @@ -2575,7 +2576,7 @@ def _ixs_values(self, i: int) -> Union[np.ndarray, ExtensionArray]: """ return self._data.iget_values(i) - def _iter_arrays(self): + def _iter_arrays(self) -> Iterator[Union[np.ndarray, ExtensionArray]]: """ Iterate over the arrays of all columns in order. This returns the values as stored in the Block (ndarray or ExtensionArray). From 8e29685cdccf12e28cd0fe3a9645301696e6e65c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Apr 2020 09:47:44 +0200 Subject: [PATCH 3/4] update type annotations + rename --- pandas/core/frame.py | 19 ++++++++++++++----- pandas/core/internals/managers.py | 3 +-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 038b8f08a3c03..078508af60fa2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -41,7 +41,16 @@ from pandas._config import get_option from pandas._libs import algos as libalgos, lib, properties -from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer +from pandas._typing import ( + ArrayLike, + Axes, + Axis, + Dtype, + FilePathOrBuffer, + Label, + Level, + Renamer, +) from pandas.compat import PY37 from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -2563,14 +2572,14 @@ def _ixs(self, i: int, axis: int = 0): return result - def _ixs_values(self, i: int) -> Union[np.ndarray, ExtensionArray]: + def _get_column_array(self, i: int) -> ArrayLike: """ - Get the values of the ith column (ndarray or ExtensionArray, as stored + Get the values of the i'th column (ndarray or ExtensionArray, as stored in the Block) """ return self._data.iget_values(i) - def _iter_arrays(self) -> Iterator[Union[np.ndarray, ExtensionArray]]: + def _iter_column_arrays(self) -> Iterator[ArrayLike]: """ Iterate over the arrays of all columns in order. This returns the values as stored in the Block (ndarray or ExtensionArray). @@ -7937,7 +7946,7 @@ def _reduce( dtype_is_dt = np.array( [ is_datetime64_any_dtype(values.dtype) or is_period_dtype(values.dtype) - for values in self._iter_arrays() + for values in self._iter_column_arrays() ], dtype=bool, ) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a93df86a8e553..e882dc132ac82 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -31,7 +31,6 @@ from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject from pandas.core.construction import extract_array @@ -1008,7 +1007,7 @@ def iget(self, i: int) -> "SingleBlockManager": self.axes[1], ) - def iget_values(self, i: int) -> Union[np.ndarray, ExtensionArray]: + def iget_values(self, i: int) -> ArrayLike: """ Return the data for column i as the values (ndarray or ExtensionArray). """ From f47223227cf419ec5aa6bb09ea6d3c9fef5bd4cb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Apr 2020 10:08:04 +0200 Subject: [PATCH 4/4] fixup rename --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 078508af60fa2..1f5640b90d243 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2585,7 +2585,7 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]: This returns the values as stored in the Block (ndarray or ExtensionArray). """ for i in range(len(self.columns)): - yield self._ixs_values(i) + yield self._get_column_array(i) def __getitem__(self, key): key = lib.item_from_zerodim(key)