Skip to content

Commit bc376de

Browse files
jbrockmendelyeshsurya
authored andcommitted
PERF: put BlockManager constructor in cython (pandas-dev#40842)
1 parent 7abd259 commit bc376de

File tree

6 files changed

+83
-143
lines changed

6 files changed

+83
-143
lines changed

pandas/_libs/internals.pyi

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,5 +79,3 @@ class BlockManager:
7979
_blklocs: np.ndarray
8080

8181
def __init__(self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=True): ...
82-
83-
def get_slice(self: T, slobj: slice, axis: int=...) -> T: ...

pandas/_libs/internals.pyx

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -372,9 +372,7 @@ cdef slice indexer_as_slice(intp_t[:] vals):
372372

373373
@cython.boundscheck(False)
374374
@cython.wraparound(False)
375-
def get_blkno_indexers(
376-
int64_t[:] blknos, bint group=True
377-
) -> list[tuple[int, slice | np.ndarray]]:
375+
def get_blkno_indexers(int64_t[:] blknos, bint group=True):
378376
"""
379377
Enumerate contiguous runs of integers in ndarray.
380378
@@ -517,7 +515,7 @@ cdef class NumpyBlock(SharedBlock):
517515
self.values = values
518516

519517
# @final # not useful in cython, but we _would_ annotate with @final
520-
cpdef NumpyBlock getitem_block_index(self, slice slicer):
518+
def getitem_block_index(self, slicer: slice) -> NumpyBlock:
521519
"""
522520
Perform __getitem__-like specialized to slicing along index.
523521

@@ -612,30 +610,3 @@ cdef class BlockManager:
612610
self._rebuild_blknos_and_blklocs()
613611

614612
# -------------------------------------------------------------------
615-
# Indexing
616-
617-
cdef BlockManager _get_index_slice(self, slobj):
618-
cdef:
619-
SharedBlock blk, nb
620-
621-
nbs = []
622-
for blk in self.blocks:
623-
nb = blk.getitem_block_index(slobj)
624-
nbs.append(nb)
625-
626-
new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
627-
return type(self)(tuple(nbs), new_axes, verify_integrity=False)
628-
629-
def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
630-
631-
if axis == 0:
632-
new_blocks = self._slice_take_blocks_ax0(slobj)
633-
elif axis == 1:
634-
return self._get_index_slice(slobj)
635-
else:
636-
raise IndexError("Requested axis not found in manager")
637-
638-
new_axes = list(self.axes)
639-
new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
640-
641-
return type(self)(tuple(new_blocks), new_axes, verify_integrity=False)

pandas/_libs/reduction.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,6 @@ cdef class BlockSlider:
489489
Ensure that we have the original blocks, blknos, and blklocs.
490490
"""
491491
mgr = self.dummy._mgr
492-
mgr.blocks = self.blocks
492+
mgr.blocks = tuple(self.blocks)
493493
mgr._blklocs = self.orig_blklocs
494494
mgr._blknos = self.orig_blknos

pandas/compat/pickle_compat.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
PeriodArray,
2222
TimedeltaArray,
2323
)
24+
from pandas.core.internals import BlockManager
2425

2526
if TYPE_CHECKING:
2627
from pandas import (
@@ -222,7 +223,8 @@ def load_newobj(self):
222223
elif issubclass(cls, TimedeltaArray) and not args:
223224
arr = np.array([], dtype="m8[ns]")
224225
obj = cls.__new__(cls, arr, arr.dtype)
225-
226+
elif cls is BlockManager and not args:
227+
obj = cls.__new__(cls, (), [], False)
226228
else:
227229
obj = cls.__new__(cls, *args)
228230

pandas/core/internals/managers.py

Lines changed: 55 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -135,22 +135,16 @@ class BaseBlockManager(DataManager):
135135
This is *not* a public API class
136136
"""
137137

138-
__slots__ = [
139-
"axes",
140-
"blocks",
141-
"_known_consolidated",
142-
"_is_consolidated",
143-
"_blknos",
144-
"_blklocs",
145-
]
138+
__slots__ = ()
146139

147140
_blknos: np.ndarray
148141
_blklocs: np.ndarray
149142
blocks: tuple[Block, ...]
150143
axes: list[Index]
151144

152-
# Non-trivially faster than a property
153145
ndim: int
146+
_known_consolidated: bool
147+
_is_consolidated: bool
154148

155149
def __init__(self, blocks, axes, verify_integrity=True):
156150
raise NotImplementedError
@@ -276,57 +270,6 @@ def arrays(self) -> list[ArrayLike]:
276270
"""
277271
return [blk.values for blk in self.blocks]
278272

279-
def __getstate__(self):
280-
block_values = [b.values for b in self.blocks]
281-
block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks]
282-
axes_array = list(self.axes)
283-
284-
extra_state = {
285-
"0.14.1": {
286-
"axes": axes_array,
287-
"blocks": [
288-
{"values": b.values, "mgr_locs": b.mgr_locs.indexer}
289-
for b in self.blocks
290-
],
291-
}
292-
}
293-
294-
# First three elements of the state are to maintain forward
295-
# compatibility with 0.13.1.
296-
return axes_array, block_values, block_items, extra_state
297-
298-
def __setstate__(self, state):
299-
def unpickle_block(values, mgr_locs, ndim: int) -> Block:
300-
# TODO(EA2D): ndim would be unnecessary with 2D EAs
301-
# older pickles may store e.g. DatetimeIndex instead of DatetimeArray
302-
values = extract_array(values, extract_numpy=True)
303-
return new_block(values, placement=mgr_locs, ndim=ndim)
304-
305-
if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
306-
state = state[3]["0.14.1"]
307-
self.axes = [ensure_index(ax) for ax in state["axes"]]
308-
ndim = len(self.axes)
309-
310-
for blk in state["blocks"]:
311-
vals = blk["values"]
312-
# older versions may hold e.g. DatetimeIndex instead of DTA
313-
vals = extract_array(vals, extract_numpy=True)
314-
blk["values"] = ensure_block_shape(vals, ndim=ndim)
315-
316-
self.blocks = tuple(
317-
unpickle_block(b["values"], b["mgr_locs"], ndim=ndim)
318-
for b in state["blocks"]
319-
)
320-
else:
321-
raise NotImplementedError("pre-0.14.1 pickles are no longer supported")
322-
323-
self._post_setstate()
324-
325-
def _post_setstate(self) -> None:
326-
self._is_consolidated = False
327-
self._known_consolidated = False
328-
self._rebuild_blknos_and_blklocs()
329-
330273
def __repr__(self) -> str:
331274
output = type(self).__name__
332275
for i, ax in enumerate(self.axes):
@@ -823,7 +766,7 @@ def consolidate(self: T) -> T:
823766
if self.is_consolidated():
824767
return self
825768

826-
bm = type(self)(self.blocks, self.axes)
769+
bm = type(self)(self.blocks, self.axes, verify_integrity=False)
827770
bm._is_consolidated = False
828771
bm._consolidate_inplace()
829772
return bm
@@ -1079,7 +1022,7 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
10791022
)
10801023

10811024

1082-
class BlockManager(BaseBlockManager):
1025+
class BlockManager(libinternals.BlockManager, BaseBlockManager):
10831026
"""
10841027
BaseBlockManager that holds 2D blocks.
10851028
"""
@@ -1095,27 +1038,18 @@ def __init__(
10951038
axes: Sequence[Index],
10961039
verify_integrity: bool = True,
10971040
):
1098-
self.axes = [ensure_index(ax) for ax in axes]
1099-
self.blocks: tuple[Block, ...] = tuple(blocks)
1100-
1101-
for block in blocks:
1102-
if self.ndim != block.ndim:
1103-
raise AssertionError(
1104-
f"Number of Block dimensions ({block.ndim}) must equal "
1105-
f"number of axes ({self.ndim})"
1106-
)
11071041

11081042
if verify_integrity:
1109-
self._verify_integrity()
1043+
assert all(isinstance(x, Index) for x in axes)
11101044

1111-
# Populate known_consolidate, blknos, and blklocs lazily
1112-
self._known_consolidated = False
1113-
# error: Incompatible types in assignment (expression has type "None",
1114-
# variable has type "ndarray")
1115-
self._blknos = None # type: ignore[assignment]
1116-
# error: Incompatible types in assignment (expression has type "None",
1117-
# variable has type "ndarray")
1118-
self._blklocs = None # type: ignore[assignment]
1045+
for block in blocks:
1046+
if self.ndim != block.ndim:
1047+
raise AssertionError(
1048+
f"Number of Block dimensions ({block.ndim}) must equal "
1049+
f"number of axes ({self.ndim})"
1050+
)
1051+
1052+
self._verify_integrity()
11191053

11201054
def _verify_integrity(self) -> None:
11211055
mgr_shape = self.shape
@@ -1130,21 +1064,6 @@ def _verify_integrity(self) -> None:
11301064
f"tot_items: {tot_items}"
11311065
)
11321066

1133-
@classmethod
1134-
def _simple_new(cls, blocks: tuple[Block, ...], axes: list[Index]):
1135-
"""
1136-
Fastpath constructor; does NO validation.
1137-
"""
1138-
obj = cls.__new__(cls)
1139-
obj.axes = axes
1140-
obj.blocks = blocks
1141-
1142-
# Populate known_consolidate, blknos, and blklocs lazily
1143-
obj._known_consolidated = False
1144-
obj._blknos = None
1145-
obj._blklocs = None
1146-
return obj
1147-
11481067
@classmethod
11491068
def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager:
11501069
"""
@@ -1210,7 +1129,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
12101129
new_axes = list(self.axes)
12111130
new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
12121131

1213-
return type(self)._simple_new(tuple(new_blocks), new_axes)
1132+
return type(self)(tuple(new_blocks), new_axes, verify_integrity=False)
12141133

12151134
def iget(self, i: int) -> SingleBlockManager:
12161135
"""
@@ -1418,7 +1337,7 @@ def idelete(self, indexer) -> BlockManager:
14181337
nbs = self._slice_take_blocks_ax0(taker, only_slice=True)
14191338
new_columns = self.items[~is_deleted]
14201339
axes = [new_columns, self.axes[1]]
1421-
return type(self)._simple_new(tuple(nbs), axes)
1340+
return type(self)(tuple(nbs), axes)
14221341

14231342
# ----------------------------------------------------------------
14241343
# Block-wise Operation
@@ -1602,6 +1521,45 @@ def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager:
16021521
block = new_block(array, placement=slice(0, len(index)), ndim=1)
16031522
return cls(block, index)
16041523

1524+
def __getstate__(self):
1525+
block_values = [b.values for b in self.blocks]
1526+
block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks]
1527+
axes_array = list(self.axes)
1528+
1529+
extra_state = {
1530+
"0.14.1": {
1531+
"axes": axes_array,
1532+
"blocks": [
1533+
{"values": b.values, "mgr_locs": b.mgr_locs.indexer}
1534+
for b in self.blocks
1535+
],
1536+
}
1537+
}
1538+
1539+
# First three elements of the state are to maintain forward
1540+
# compatibility with 0.13.1.
1541+
return axes_array, block_values, block_items, extra_state
1542+
1543+
def __setstate__(self, state):
1544+
def unpickle_block(values, mgr_locs, ndim: int) -> Block:
1545+
# TODO(EA2D): ndim would be unnecessary with 2D EAs
1546+
# older pickles may store e.g. DatetimeIndex instead of DatetimeArray
1547+
values = extract_array(values, extract_numpy=True)
1548+
return new_block(values, placement=mgr_locs, ndim=ndim)
1549+
1550+
if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
1551+
state = state[3]["0.14.1"]
1552+
self.axes = [ensure_index(ax) for ax in state["axes"]]
1553+
ndim = len(self.axes)
1554+
self.blocks = tuple(
1555+
unpickle_block(b["values"], b["mgr_locs"], ndim=ndim)
1556+
for b in state["blocks"]
1557+
)
1558+
else:
1559+
raise NotImplementedError("pre-0.14.1 pickles are no longer supported")
1560+
1561+
self._post_setstate()
1562+
16051563
def _post_setstate(self):
16061564
pass
16071565

0 commit comments

Comments
 (0)