Skip to content

Commit 9a97677

Browse files
committed
getitem fastpath for slice
1 parent a0d538a commit 9a97677

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

pandas/_libs/arrays.pyx

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import numpy as np
77

88
cimport numpy as cnp
99
from cpython cimport PyErr_Clear
10+
from cpython.slice cimport PySlice_Unpack
1011
from libc.stdlib cimport (
1112
free,
1213
malloc,
@@ -468,13 +469,40 @@ cdef class BitmaskArray:
468469

469470
def __getitem__(self, key):
470471
cdef Py_ssize_t ckey
472+
cdef Py_ssize_t start, stop, step
473+
cdef BitmaskArray bma
474+
cdef ArrowBitmap bitmap
475+
cdef int64_t nbytes
476+
cdef BitmaskArray self_ = self
471477
# to_numpy can be expensive, so try to avoid for simple cases
472478
if isinstance(key, int) and self.ndim == 1:
473479
ckey = key
474480
if ckey >= 0 and ckey < self.bitmap.size_bits:
475481
return bool(ArrowBitGet(self.bitmap.buffer.data, ckey))
476482
elif is_null_slice(key):
477483
return self.copy()
484+
elif isinstance(key, slice):
485+
# fastpath for slices that start at 0 and step 1 at a time
486+
# towards a positive number.
487+
# TODO: upstream generic ArrowBitsGet function in nanoarrow
488+
PySlice_Unpack(key, &start, &stop, &step)
489+
if start == 0 and stop > 0 and step == 1:
490+
bma = BitmaskArray.__new__(BitmaskArray)
491+
ArrowBitmapInit(&bitmap)
492+
nbytes = (stop + 7) // 8
493+
ArrowBitmapReserve(&bitmap, nbytes)
494+
memcpy(bitmap.buffer.data, self_.bitmap.buffer.data, nbytes)
495+
bitmap.buffer.size_bytes = nbytes
496+
bitmap.size_bits = stop
497+
498+
bma.bitmap = bitmap
499+
bma.buffer_owner = True
500+
bma.ndim = self_.ndim
501+
bma.shape = self_.shape
502+
bma.strides = self_.strides
503+
bma.parent = False
504+
505+
return bma
478506

479507
return self.to_numpy()[key]
480508

pandas/tests/arrays/masked/test_bitmask.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,24 @@ def test_getitem_null_slice():
114114
result = bma[:]
115115

116116
assert not result.parent
117+
assert len(result) == 3
117118

118119
assert result.bytes[0] & 1 == 1
119120
assert (result.bytes[0] >> 1) & 1 == 0
120121
assert (result.bytes[0] >> 2) & 1 == 1
121122

122123

124+
def test_getitem_monotonic_slice():
125+
bma = BitmaskArray(np.array([True, False, True]))
126+
result = bma[slice(2)]
127+
128+
assert not result.parent
129+
assert len(result) == 2
130+
131+
assert result.bytes[0] & 1 == 1
132+
assert (result.bytes[0] >> 1) & 1 == 0
133+
134+
123135
@pytest.mark.parametrize(
124136
"indexer,expected",
125137
[

0 commit comments

Comments
 (0)