From 7b41586f0914be110d7b5670b5f6bf35925de4be Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 16 Jan 2021 20:01:40 -0800 Subject: [PATCH 1/5] REF: Merge ComplexBlock into NumericBlock --- pandas/core/internals/__init__.py | 4 ++-- pandas/core/internals/blocks.py | 18 ++++++------------ pandas/core/internals/managers.py | 4 ++-- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index e71143224556b..4efa36ac76110 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -4,12 +4,12 @@ Block, BoolBlock, CategoricalBlock, - ComplexBlock, DatetimeBlock, DatetimeTZBlock, ExtensionBlock, FloatBlock, IntBlock, + NumericBlock, ObjectBlock, TimeDeltaBlock, make_block, @@ -27,7 +27,7 @@ "Block", "BoolBlock", "CategoricalBlock", - "ComplexBlock", + "NumericBlock", "DatetimeBlock", "DatetimeTZBlock", "ExtensionBlock", diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1356b9d3b2ca3..464182f47440c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -101,7 +101,6 @@ class Block(PandasObject): is_numeric = False is_float = False is_integer = False - is_complex = False is_datetime = False is_datetimetz = False is_timedelta = False @@ -1956,14 +1955,15 @@ def to_native_types( return self.make_block(res) -class ComplexBlock(NumericBlock): +class IntBlock(NumericBlock): __slots__ = () - is_complex = True + is_integer = True + _can_hold_na = False -class IntBlock(NumericBlock): +class BoolBlock(NumericBlock): __slots__ = () - is_integer = True + is_bool = True _can_hold_na = False @@ -2269,12 +2269,6 @@ def fillna(self, value, **kwargs): return super().fillna(value, **kwargs) -class BoolBlock(NumericBlock): - __slots__ = () - is_bool = True - _can_hold_na = False - - class ObjectBlock(Block): __slots__ = () is_object = True @@ -2478,7 +2472,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): elif kind == "f": cls = FloatBlock elif kind == "c": - cls = ComplexBlock + cls = NumericBlock elif kind == "i" or kind == "u": cls = IntBlock elif kind == "b": diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ad9cdcfa1b07f..cf165bb492939 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1732,8 +1732,8 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]: float_blocks = _multi_blockify(items_dict["FloatBlock"]) blocks.extend(float_blocks) - if len(items_dict["ComplexBlock"]): - complex_blocks = _multi_blockify(items_dict["ComplexBlock"]) + if len(items_dict["NumericBlock"]): + complex_blocks = _multi_blockify(items_dict["NumericBlock"]) blocks.extend(complex_blocks) if len(items_dict["TimeDeltaBlock"]): From a8c54dec8dc594a428e8e1c4ce3ea6fa2f3e732b Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 17 Jan 2021 10:39:12 -0800 Subject: [PATCH 2/5] REF: merge IntBlock into NumericBlock --- pandas/core/internals/__init__.py | 2 -- pandas/core/internals/blocks.py | 28 ++++++++++------------ pandas/core/internals/managers.py | 4 ---- pandas/tests/frame/test_block_internals.py | 5 ++-- pandas/tests/internals/test_internals.py | 6 ++--- pandas/tests/reshape/concat/test_concat.py | 4 ++-- pandas/tests/series/test_constructors.py | 4 ++-- 7 files changed, 22 insertions(+), 31 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 4efa36ac76110..bae0a3b09ef77 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -8,7 +8,6 @@ DatetimeTZBlock, ExtensionBlock, FloatBlock, - IntBlock, NumericBlock, ObjectBlock, TimeDeltaBlock, @@ -32,7 +31,6 @@ "DatetimeTZBlock", "ExtensionBlock", "FloatBlock", - "IntBlock", "ObjectBlock", "TimeDeltaBlock", "safe_reshape", diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 464182f47440c..8db45dd076963 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -100,7 +100,6 @@ class Block(PandasObject): __slots__ = ["_mgr_locs", "values", "ndim"] is_numeric = False is_float = False - is_integer = False is_datetime = False is_datetimetz = False is_timedelta = False @@ -1194,7 +1193,7 @@ def _interpolate( # only deal with floats if not self.is_float: - if not self.is_integer: + if self.dtype.kind not in ["i", "u"]: return [self] data = data.astype(np.float64) @@ -1315,7 +1314,7 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List["Block"]: # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) if ( - (self.is_integer or self.is_bool) + (self.dtype.kind in ["b", "i", "u"]) and lib.is_float(other) and np.isnan(other) ): @@ -1331,7 +1330,7 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List["Block"]: return self._maybe_downcast(blocks, "infer") if not ( - (self.is_integer or self.is_bool) + (self.dtype.kind in ["b", "i", "u"]) and lib.is_float(other) and np.isnan(other) ): @@ -1911,11 +1910,18 @@ def external_values(self): class NumericBlock(Block): __slots__ = () is_numeric = True - _can_hold_na = True def _can_hold_element(self, element: Any) -> bool: return can_hold_element(self.dtype, element) + @property + def _can_hold_na(self) -> bool: + return self.dtype.kind not in ["b", "i", "u"] + + @property + def is_bool(self) -> bool: + return self.dtype.kind == "b" + class FloatBlock(NumericBlock): __slots__ = () @@ -1955,16 +1961,8 @@ def to_native_types( return self.make_block(res) -class IntBlock(NumericBlock): - __slots__ = () - is_integer = True - _can_hold_na = False - - class BoolBlock(NumericBlock): __slots__ = () - is_bool = True - _can_hold_na = False class DatetimeLikeBlockMixin(HybridMixin, Block): @@ -2232,7 +2230,7 @@ def _check_ndim(self, values, ndim): return ndim -class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): +class TimeDeltaBlock(DatetimeLikeBlockMixin): __slots__ = () is_timedelta = True _can_hold_na = True @@ -2474,7 +2472,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): elif kind == "c": cls = NumericBlock elif kind == "i" or kind == "u": - cls = IntBlock + cls = NumericBlock elif kind == "b": cls = BoolBlock else: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cf165bb492939..5c8d97d05db56 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1740,10 +1740,6 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]: timedelta_blocks = _multi_blockify(items_dict["TimeDeltaBlock"]) blocks.extend(timedelta_blocks) - if len(items_dict["IntBlock"]): - int_blocks = _multi_blockify(items_dict["IntBlock"]) - blocks.extend(int_blocks) - if len(items_dict["DatetimeBlock"]): datetime_blocks = _simple_blockify(items_dict["DatetimeBlock"], DT64NS_DTYPE) blocks.extend(datetime_blocks) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 8954d8a0e7598..b36f6fcf8b9f8 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -18,8 +18,7 @@ option_context, ) import pandas._testing as tm -from pandas.core.internals import ObjectBlock -from pandas.core.internals.blocks import IntBlock +from pandas.core.internals import NumericBlock, ObjectBlock # Segregated collection of methods that require the BlockManager internal data # structure @@ -352,7 +351,7 @@ def test_constructor_no_pandas_array(self): result = DataFrame({"A": arr}) expected = DataFrame({"A": [1, 2, 3]}) tm.assert_frame_equal(result, expected) - assert isinstance(result._mgr.blocks[0], IntBlock) + assert isinstance(result._mgr.blocks[0], NumericBlock) def test_add_column_with_pandas_array(self): # GH 26390 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index f9ba05f7092d4..0a50ef2831534 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1295,17 +1295,17 @@ def test_make_block_no_pandas_array(): # PandasArray, no dtype result = make_block(arr, slice(len(arr)), ndim=arr.ndim) - assert result.is_integer is True + assert result.dtype.kind in ["i", "u"] assert result.is_extension is False # PandasArray, PandasDtype result = make_block(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) - assert result.is_integer is True + assert result.dtype.kind in ["i", "u"] assert result.is_extension is False # ndarray, PandasDtype result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) - assert result.is_integer is True + assert result.dtype.kind in ["i", "u"] assert result.is_extension is False diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 16c4e9456aa05..575903de8f946 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -31,7 +31,7 @@ def test_concat_copy(self): for b in result._mgr.blocks: if b.is_float: assert b.values.base is df._mgr.blocks[0].values.base - elif b.is_integer: + elif b.dtype.kind in ["i", "u"]: assert b.values.base is df2._mgr.blocks[0].values.base elif b.is_object: assert b.values.base is not None @@ -42,7 +42,7 @@ def test_concat_copy(self): for b in result._mgr.blocks: if b.is_float: assert b.values.base is None - elif b.is_integer: + elif b.dtype.kind in ["i", "u"]: assert b.values.base is df2._mgr.blocks[0].values.base elif b.is_object: assert b.values.base is not None diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 9350a3becb3d9..0565ca7a3476a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -32,7 +32,7 @@ ) import pandas._testing as tm from pandas.core.arrays import IntervalArray, period_array -from pandas.core.internals.blocks import IntBlock +from pandas.core.internals.blocks import NumericBlock class TestSeriesConstructors: @@ -1649,7 +1649,7 @@ def test_constructor_no_pandas_array(self): ser = Series([1, 2, 3]) result = Series(ser.array) tm.assert_series_equal(ser, result) - assert isinstance(result._mgr.blocks[0], IntBlock) + assert isinstance(result._mgr.blocks[0], NumericBlock) def test_from_array(self): result = Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]")) From 63794fe99840510364f61fa959a58fac34d27b39 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 17 Jan 2021 11:01:45 -0800 Subject: [PATCH 3/5] REF: Roll BoolBlock into NumericBlocK --- pandas/core/internals/__init__.py | 2 -- pandas/core/internals/blocks.py | 6 +----- pandas/core/internals/managers.py | 4 ---- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index bae0a3b09ef77..ff4e186e147d7 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -2,7 +2,6 @@ from pandas.core.internals.base import DataManager from pandas.core.internals.blocks import ( # io.pytables, io.packers Block, - BoolBlock, CategoricalBlock, DatetimeBlock, DatetimeTZBlock, @@ -24,7 +23,6 @@ __all__ = [ "Block", - "BoolBlock", "CategoricalBlock", "NumericBlock", "DatetimeBlock", diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8db45dd076963..dd5d2f46ff44e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1961,10 +1961,6 @@ def to_native_types( return self.make_block(res) -class BoolBlock(NumericBlock): - __slots__ = () - - class DatetimeLikeBlockMixin(HybridMixin, Block): """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" @@ -2474,7 +2470,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): elif kind == "i" or kind == "u": cls = NumericBlock elif kind == "b": - cls = BoolBlock + cls = NumericBlock else: cls = ObjectBlock return cls diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5c8d97d05db56..02856d4211d92 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1751,10 +1751,6 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]: ] blocks.extend(dttz_blocks) - if len(items_dict["BoolBlock"]): - bool_blocks = _simple_blockify(items_dict["BoolBlock"], np.bool_) - blocks.extend(bool_blocks) - if len(items_dict["ObjectBlock"]) > 0: object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_) blocks.extend(object_blocks) From 4ac5f802b2377bf57c9b846aa9671514146ba2ee Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 17 Jan 2021 11:02:12 -0800 Subject: [PATCH 4/5] simplify --- pandas/core/internals/blocks.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index dd5d2f46ff44e..8bcf9fface60f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2465,11 +2465,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): cls = TimeDeltaBlock elif kind == "f": cls = FloatBlock - elif kind == "c": - cls = NumericBlock - elif kind == "i" or kind == "u": - cls = NumericBlock - elif kind == "b": + elif kind in ["c", "i", "u", "b"]: cls = NumericBlock else: cls = ObjectBlock From efe061cd982b91043748f530bb4228bf10f239bd Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 17 Jan 2021 19:56:29 -0800 Subject: [PATCH 5/5] mypy fixup --- pandas/core/internals/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8bcf9fface60f..a96f066bc248b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1915,11 +1915,11 @@ def _can_hold_element(self, element: Any) -> bool: return can_hold_element(self.dtype, element) @property - def _can_hold_na(self) -> bool: + def _can_hold_na(self): return self.dtype.kind not in ["b", "i", "u"] @property - def is_bool(self) -> bool: + def is_bool(self): return self.dtype.kind == "b"