From f1097380e5878bce99eaafeb070e91c18249a04a Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 26 Nov 2013 08:42:47 -0500 Subject: [PATCH] PERF: perf enhancements in indexing/query/eval --- pandas/computation/align.py | 3 ++- pandas/computation/expr.py | 2 ++ pandas/core/frame.py | 6 +++--- pandas/core/generic.py | 2 +- pandas/core/indexing.py | 6 +++--- pandas/core/internals.py | 7 +++---- vb_suite/eval.py | 11 +++++++++++ 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/pandas/computation/align.py b/pandas/computation/align.py index 233f2b61dc463..b61169e1f55e0 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -151,7 +151,8 @@ def _align_core(terms): f = partial(ti.reindex_axis, reindexer, axis=axis, copy=False) - if pd.lib.is_bool_array(ti.values): + # need to fill if we have a bool dtype/array + if isinstance(ti, (np.ndarray, pd.Series)) and ti.dtype == object and pd.lib.is_bool_array(ti.values): r = f(fill_value=True) else: r = f() diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index bcb5570eae1fc..0baa596778996 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -183,6 +183,8 @@ def update(self, level=None): while sl >= 0: frame = frame.f_back sl -= 1 + if frame is None: + break frames.append(frame) for f in frames[::-1]: self.locals.update(f.f_locals) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 88cf898d354e9..5d658410a0e32 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3061,9 +3061,9 @@ def combine_first(self, other): Examples -------- a's values prioritized, use values from b to fill holes: - + >>> a.combine_first(b) - + Returns ------- @@ -3623,7 +3623,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', how : {'left', 'right', 'outer', 'inner'} How to handle indexes of the two objects. Default: 'left' for joining on index, None otherwise - + * left: use calling frame's index * right: use input frame's index * outer: form union of indexes diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5be71afc18663..5ab5e9063a2c5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1092,7 +1092,7 @@ def take(self, indices, axis=0, convert=True): new_data = self._data.reindex_axis(new_items, indexer=indices, axis=0) else: - new_data = self._data.take(indices, axis=baxis) + new_data = self._data.take(indices, axis=baxis, verify=convert) return self._constructor(new_data)\ ._setitem_copy(True)\ .__finalize__(self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fcb9a82b96211..a258135597078 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -978,7 +978,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(indexer, slice): return self._slice(indexer, axis=axis, typ='iloc') else: - return self.obj.take(indexer, axis=axis) + return self.obj.take(indexer, axis=axis, convert=False) class _IXIndexer(_NDFrameIndexer): @@ -1038,7 +1038,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(indexer, slice): return self._slice(indexer, axis=axis, typ='iloc') else: - return self.obj.take(indexer, axis=axis) + return self.obj.take(indexer, axis=axis, convert=False) class _LocIndexer(_LocationIndexer): @@ -1195,7 +1195,7 @@ def _get_slice_axis(self, slice_obj, axis=0): return self._slice(slice_obj, axis=axis, raise_on_error=True, typ='iloc') else: - return self.obj.take(slice_obj, axis=axis) + return self.obj.take(slice_obj, axis=axis, convert=False) def _getitem_axis(self, key, axis=0): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 79667ecddc8a6..959d0186030cd 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3247,10 +3247,9 @@ def take(self, indexer, new_index=None, axis=1, verify=True): if verify: indexer = _maybe_convert_indices(indexer, n) - - if ((indexer == -1) | (indexer >= n)).any(): - raise Exception('Indices must be nonzero and less than ' - 'the axis length') + if ((indexer == -1) | (indexer >= n)).any(): + raise Exception('Indices must be nonzero and less than ' + 'the axis length') new_axes = list(self.axes) if new_index is None: diff --git a/vb_suite/eval.py b/vb_suite/eval.py index 506d00b8bf9f9..3b0efa9e88f48 100644 --- a/vb_suite/eval.py +++ b/vb_suite/eval.py @@ -139,3 +139,14 @@ query_datetime_index = Benchmark("df.query('index < ts')", index_setup, start_date=datetime(2013, 9, 27)) + +setup = setup + """ +N = 1000000 +df = DataFrame({'a': np.random.randn(N)}) +min_val = df['a'].min() +max_val = df['a'].max() +""" + +query_with_boolean_selection = Benchmark("df.query('(a >= min_val) & (a <= max_val)')", + index_setup, start_date=datetime(2013, 9, 27)) +