Skip to content

Commit f109738

Browse files
committed
PERF: perf enhancements in indexing/query/eval
1 parent 66980c6 commit f109738

File tree

7 files changed

+25
-12
lines changed

7 files changed

+25
-12
lines changed

pandas/computation/align.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ def _align_core(terms):
151151
f = partial(ti.reindex_axis, reindexer, axis=axis,
152152
copy=False)
153153

154-
if pd.lib.is_bool_array(ti.values):
154+
# need to fill if we have a bool dtype/array
155+
if isinstance(ti, (np.ndarray, pd.Series)) and ti.dtype == object and pd.lib.is_bool_array(ti.values):
155156
r = f(fill_value=True)
156157
else:
157158
r = f()

pandas/computation/expr.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ def update(self, level=None):
183183
while sl >= 0:
184184
frame = frame.f_back
185185
sl -= 1
186+
if frame is None:
187+
break
186188
frames.append(frame)
187189
for f in frames[::-1]:
188190
self.locals.update(f.f_locals)

pandas/core/frame.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3061,9 +3061,9 @@ def combine_first(self, other):
30613061
Examples
30623062
--------
30633063
a's values prioritized, use values from b to fill holes:
3064-
3064+
30653065
>>> a.combine_first(b)
3066-
3066+
30673067
30683068
Returns
30693069
-------
@@ -3623,7 +3623,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='',
36233623
how : {'left', 'right', 'outer', 'inner'}
36243624
How to handle indexes of the two objects. Default: 'left'
36253625
for joining on index, None otherwise
3626-
3626+
36273627
* left: use calling frame's index
36283628
* right: use input frame's index
36293629
* outer: form union of indexes

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1092,7 +1092,7 @@ def take(self, indices, axis=0, convert=True):
10921092
new_data = self._data.reindex_axis(new_items, indexer=indices,
10931093
axis=0)
10941094
else:
1095-
new_data = self._data.take(indices, axis=baxis)
1095+
new_data = self._data.take(indices, axis=baxis, verify=convert)
10961096
return self._constructor(new_data)\
10971097
._setitem_copy(True)\
10981098
.__finalize__(self)

pandas/core/indexing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
978978
if isinstance(indexer, slice):
979979
return self._slice(indexer, axis=axis, typ='iloc')
980980
else:
981-
return self.obj.take(indexer, axis=axis)
981+
return self.obj.take(indexer, axis=axis, convert=False)
982982

983983

984984
class _IXIndexer(_NDFrameIndexer):
@@ -1038,7 +1038,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
10381038
if isinstance(indexer, slice):
10391039
return self._slice(indexer, axis=axis, typ='iloc')
10401040
else:
1041-
return self.obj.take(indexer, axis=axis)
1041+
return self.obj.take(indexer, axis=axis, convert=False)
10421042

10431043

10441044
class _LocIndexer(_LocationIndexer):
@@ -1195,7 +1195,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
11951195
return self._slice(slice_obj, axis=axis, raise_on_error=True,
11961196
typ='iloc')
11971197
else:
1198-
return self.obj.take(slice_obj, axis=axis)
1198+
return self.obj.take(slice_obj, axis=axis, convert=False)
11991199

12001200
def _getitem_axis(self, key, axis=0):
12011201

pandas/core/internals.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3247,10 +3247,9 @@ def take(self, indexer, new_index=None, axis=1, verify=True):
32473247

32483248
if verify:
32493249
indexer = _maybe_convert_indices(indexer, n)
3250-
3251-
if ((indexer == -1) | (indexer >= n)).any():
3252-
raise Exception('Indices must be nonzero and less than '
3253-
'the axis length')
3250+
if ((indexer == -1) | (indexer >= n)).any():
3251+
raise Exception('Indices must be nonzero and less than '
3252+
'the axis length')
32543253

32553254
new_axes = list(self.axes)
32563255
if new_index is None:

vb_suite/eval.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,14 @@
139139

140140
query_datetime_index = Benchmark("df.query('index < ts')",
141141
index_setup, start_date=datetime(2013, 9, 27))
142+
143+
setup = setup + """
144+
N = 1000000
145+
df = DataFrame({'a': np.random.randn(N)})
146+
min_val = df['a'].min()
147+
max_val = df['a'].max()
148+
"""
149+
150+
query_with_boolean_selection = Benchmark("df.query('(a >= min_val) & (a <= max_val)')",
151+
index_setup, start_date=datetime(2013, 9, 27))
152+

0 commit comments

Comments
 (0)