Skip to content

Commit 57da161

Browse files
authored
REF: implement libinternals.NumpyBlock (#40757)
1 parent c8ad7fd commit 57da161

File tree

3 files changed

+63
-22
lines changed

3 files changed

+63
-22
lines changed

pandas/_libs/internals.pyi

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ from typing import (
66

77
import numpy as np
88

9-
from pandas._typing import ArrayLike
9+
from pandas._typing import (
10+
ArrayLike,
11+
T,
12+
)
1013

1114
def slice_len(slc: slice, objlen: int = ...) -> int: ...
1215

@@ -50,9 +53,16 @@ class BlockPlacement:
5053
def append(self, others: list[BlockPlacement]) -> BlockPlacement: ...
5154

5255

53-
class Block:
56+
class SharedBlock:
5457
_mgr_locs: BlockPlacement
5558
ndim: int
5659
values: ArrayLike
5760

5861
def __init__(self, values: ArrayLike, placement: BlockPlacement, ndim: int): ...
62+
63+
class NumpyBlock(SharedBlock):
64+
values: np.ndarray
65+
def getitem_block_index(self: T, slicer: slice) -> T: ...
66+
67+
class Block(SharedBlock):
68+
...

pandas/_libs/internals.pyx

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -458,14 +458,13 @@ def get_blkno_placements(blknos, group: bool = True):
458458

459459

460460
@cython.freelist(64)
461-
cdef class Block:
461+
cdef class SharedBlock:
462462
"""
463463
Defining __init__ in a cython class significantly improves performance.
464464
"""
465465
cdef:
466466
public BlockPlacement _mgr_locs
467467
readonly int ndim
468-
public object values
469468

470469
def __cinit__(self, values, placement: BlockPlacement, ndim: int):
471470
"""
@@ -479,7 +478,6 @@ cdef class Block:
479478
"""
480479
self._mgr_locs = placement
481480
self.ndim = ndim
482-
self.values = values
483481

484482
cpdef __reduce__(self):
485483
# We have to do some gymnastics b/c "ndim" is keyword-only
@@ -505,3 +503,33 @@ cdef class Block:
505503

506504
ndim = maybe_infer_ndim(self.values, self.mgr_locs)
507505
self.ndim = ndim
506+
507+
508+
cdef class NumpyBlock(SharedBlock):
509+
cdef:
510+
public ndarray values
511+
512+
def __cinit__(self, ndarray values, BlockPlacement placement, int ndim):
513+
# set values here the (implicit) call to SharedBlock.__cinit__ will
514+
# set placement and ndim
515+
self.values = values
516+
517+
# @final # not useful in cython, but we _would_ annotate with @final
518+
def getitem_block_index(self, slicer: slice) -> NumpyBlock:
519+
"""
520+
Perform __getitem__-like specialized to slicing along index.
521+
522+
Assumes self.ndim == 2
523+
"""
524+
new_values = self.values[..., slicer]
525+
return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
526+
527+
528+
cdef class Block(SharedBlock):
529+
cdef:
530+
public object values
531+
532+
def __cinit__(self, object values, BlockPlacement placement, int ndim):
533+
# set values here the (implicit) call to SharedBlock.__cinit__ will
534+
# set placement and ndim
535+
self.values = values

pandas/core/internals/blocks.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import (
66
TYPE_CHECKING,
77
Any,
8-
Union,
8+
Callable,
99
cast,
1010
)
1111
import warnings
@@ -138,7 +138,7 @@ def newfunc(self, *args, **kwargs) -> list[Block]:
138138
return cast(F, newfunc)
139139

140140

141-
class Block(libinternals.Block, PandasObject):
141+
class Block(PandasObject):
142142
"""
143143
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
144144
data structure
@@ -147,6 +147,8 @@ class Block(libinternals.Block, PandasObject):
147147
"""
148148

149149
values: np.ndarray | ExtensionArray
150+
ndim: int
151+
__init__: Callable
150152

151153
__slots__ = ()
152154
is_numeric = False
@@ -313,7 +315,6 @@ def getitem_block(self, slicer) -> Block:
313315

314316
return type(self)(new_values, new_mgr_locs, self.ndim)
315317

316-
@final
317318
def getitem_block_index(self, slicer: slice) -> Block:
318319
"""
319320
Perform __getitem__-like specialized to slicing along index.
@@ -1371,7 +1372,7 @@ def interpolate(
13711372
return self.make_block_same_class(new_values)
13721373

13731374

1374-
class ExtensionBlock(EABackedBlock):
1375+
class ExtensionBlock(libinternals.Block, EABackedBlock):
13751376
"""
13761377
Block for holding extension types.
13771378
@@ -1660,7 +1661,13 @@ def _unstack(self, unstacker, fill_value, new_placement):
16601661
return blocks, mask
16611662

16621663

1663-
class NumericBlock(Block):
1664+
class NumpyBlock(libinternals.NumpyBlock, Block):
1665+
values: np.ndarray
1666+
1667+
getitem_block_index = libinternals.NumpyBlock.getitem_block_index
1668+
1669+
1670+
class NumericBlock(NumpyBlock):
16641671
__slots__ = ()
16651672
is_numeric = True
16661673

@@ -1771,16 +1778,15 @@ def fillna(
17711778
return [self.make_block_same_class(values=new_values)]
17721779

17731780

1774-
class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
1775-
"""Mixin class for DatetimeLikeBlock, DatetimeTZBlock."""
1781+
class DatetimeLikeBlock(libinternals.Block, NDArrayBackedExtensionBlock):
1782+
"""Block for datetime64[ns], timedelta64[ns]."""
17761783

17771784
__slots__ = ()
17781785
is_numeric = False
1779-
17801786
values: DatetimeArray | TimedeltaArray
17811787

17821788

1783-
class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlock):
1789+
class DatetimeTZBlock(ExtensionBlock, NDArrayBackedExtensionBlock):
17841790
""" implement a datetime64 block with a tz attribute """
17851791

17861792
values: DatetimeArray
@@ -1794,18 +1800,15 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlock):
17941800
putmask = NDArrayBackedExtensionBlock.putmask
17951801
fillna = NDArrayBackedExtensionBlock.fillna
17961802

1797-
# error: Incompatible types in assignment (expression has type
1798-
# "Callable[[NDArrayBackedExtensionBlock], bool]", base class "ExtensionBlock"
1799-
# defined the type as "bool") [assignment]
1800-
is_view = NDArrayBackedExtensionBlock.is_view # type: ignore[assignment]
1803+
get_values = NDArrayBackedExtensionBlock.get_values
1804+
1805+
is_view = NDArrayBackedExtensionBlock.is_view
18011806

18021807

1803-
class ObjectBlock(Block):
1808+
class ObjectBlock(NumpyBlock):
18041809
__slots__ = ()
18051810
is_object = True
18061811

1807-
values: np.ndarray
1808-
18091812
@maybe_split
18101813
def reduce(self, func, ignore_failures: bool = False) -> list[Block]:
18111814
"""
@@ -2030,7 +2033,7 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
20302033
# TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
20312034
# block.shape is incorrect for "2D" ExtensionArrays
20322035
# We can't, and don't need to, reshape.
2033-
values = cast(Union[np.ndarray, DatetimeArray, TimedeltaArray], values)
2036+
values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values)
20342037
values = values.reshape(1, -1)
20352038

20362039
return values

0 commit comments

Comments
 (0)