From 36b4ba21c567567fd88cc6fe22d242d52c10b6c0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 21 Jan 2020 13:48:44 -0600 Subject: [PATCH 1/7] BUG: Series/Frame invert dytpes Fixes NDFrame.__invert__ to preserve dtypes. Noatably * Dispatches to extension arrays * Applies blockwise to avoid upcasting to a common dtype --- doc/source/whatsnew/v1.0.0.rst | 2 ++ pandas/core/arrays/masked.py | 3 +++ pandas/core/generic.py | 5 +++-- pandas/core/internals/blocks.py | 4 ++++ pandas/core/internals/managers.py | 7 +++++++ pandas/tests/arrays/test_boolean.py | 18 ++++++++++++++++++ pandas/tests/extension/base/__init__.py | 7 ++++++- pandas/tests/extension/base/ops.py | 8 ++++++++ pandas/tests/extension/test_boolean.py | 4 ++++ pandas/tests/frame/test_operators.py | 21 +++++++++++++++++++++ 10 files changed, 76 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ec6ad38bbc7cf..aa13a50068271 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1018,6 +1018,7 @@ Numeric - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) - Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) - Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) +- Bug in dtypes being lost in ``DataFrame.__invert__`` with mixed dtypes (:issue:``) Conversion ^^^^^^^^^^ @@ -1166,6 +1167,7 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - Bug where nullable integers could not be compared to strings (:issue:`28930`) - Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) +- Bug in dtype being lost in ``__invert__`` for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) Other diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 47605413ff1a6..5eaed70721592 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -50,6 +50,9 @@ def __iter__(self): def __len__(self) -> int: return len(self._data) + def __invert__(self): + return type(self)(~self._data, self._mask) + def to_numpy( self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default, ): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6c04212e26924..b89fa148d66e9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1393,8 +1393,9 @@ def __invert__(self): # inv fails with 0 len return self - arr = operator.inv(com.values_from_object(self)) - return self.__array_wrap__(arr) + new_data = ~self._data + result = self._constructor(new_data).__finalize__(self) + return result def __nonzero__(self): raise ValueError( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cb702a81d2bde..33fcc662ee07b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -294,6 +294,10 @@ def __setstate__(self, state): self.values = state[1] self.ndim = self.values.ndim + def __invert__(self): + new_values = ~self.values + return make_block(new_values, self.mgr_locs, ndim=self.ndim) + def _slice(self, slicer): """ return a slice of my values """ return self.values[slicer] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 847f543ebca4d..a4c80cf585b96 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -528,6 +528,10 @@ def get_axe(block, qs, axes): [make_block(values, ndim=1, placement=np.arange(len(values)))], axes[0] ) + def __invert__(self): + new_blocks = [~blk for blk in self.blocks] + return type(self)(new_blocks, axes=self.axes) + def isna(self, func): return self.apply("apply", func=func) @@ -1603,6 +1607,9 @@ def concat(self, to_concat, new_axis): mgr = SingleBlockManager(new_block, new_axis) return mgr + def __invert__(self): + return type(self)(~self._block, self.axes) + # -------------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index cc8d0cdcb518d..f8464acf7153c 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -455,6 +455,24 @@ def test_ufunc_reduce_raises(values): np.add.reduce(a) +class TestUnaryOps: + def test_invert(self): + a = pd.array([True, False, None], dtype="boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(~a, expected) + + expected = pd.Series(expected, index=["a", "b", "c"], name="name") + result = ~pd.Series(a, index=["a", "b", "c"], name="name") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) + result = ~df + expected = pd.DataFrame( + {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] + ) + tm.assert_frame_equal(result, expected) + + class TestLogicalOps(BaseOpsUtil): def test_numpy_scalars_ok(self, all_logical_operators): a = pd.array([True, False, None], dtype="boolean") diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 090df35bd94c9..e2b6ea0304f6a 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -49,7 +49,12 @@ class TestMyDtype(BaseDtypeTests): from .io import BaseParsingTests # noqa from .methods import BaseMethodsTests # noqa from .missing import BaseMissingTests # noqa -from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil # noqa +from .ops import ( # noqa + BaseArithmeticOpsTests, + BaseComparisonOpsTests, + BaseOpsUtil, + BaseUnaryOpsTests, +) from .printing import BasePrintingTests # noqa from .reduce import ( # noqa BaseBooleanReduceTests, diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 20d06ef2e5647..0609f19c8e0c3 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -168,3 +168,11 @@ def test_direct_arith_with_series_returns_not_implemented(self, data): assert result is NotImplemented else: raise pytest.skip(f"{type(data).__name__} does not implement __eq__") + + +class BaseUnaryOpsTests(BaseOpsUtil): + def test_invert(self, data): + s = pd.Series(data, name="name") + result = ~s + expected = pd.Series(~data, name="name") + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index c489445d8512a..0c6b187eac1fc 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -342,6 +342,10 @@ class TestPrinting(base.BasePrintingTests): pass +class TestUnaryOps(base.BaseUnaryOpsTests): + pass + + # TODO parsing not yet supported # class TestParsing(base.BaseParsingTests): # pass diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index c727cb398d53e..55f1216a0efd7 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -61,6 +61,27 @@ def test_invert(self, float_frame): tm.assert_frame_equal(-(df < 0), ~(df < 0)) + def test_invert_mixed(self): + shape = (10, 5) + df = pd.concat( + [ + pd.DataFrame(np.zeros(shape, dtype="bool")), + pd.DataFrame(np.zeros(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + result = ~df + expected = pd.concat( + [ + pd.DataFrame(np.ones(shape, dtype="bool")), + pd.DataFrame(-np.ones(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "df", [ From 8b979d72f0e23a15372de8f41c23bbd1ca0e48f2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 21 Jan 2020 14:39:59 -0600 Subject: [PATCH 2/7] sprse --- pandas/core/indexing.py | 2 +- pandas/tests/arrays/sparse/test_arithmetics.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 63a86792082da..e93fe203e3e9e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2231,7 +2231,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: result = result.astype(bool)._values else: if is_sparse(result): - result = result.to_dense() + result = np.asarray(result) result = check_bool_array_indexer(index, result) return result diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 76442a63ccb0f..01431d3709449 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -468,6 +468,14 @@ def test_invert(fill_value): expected = SparseArray(~arr, fill_value=not fill_value) tm.assert_sp_array_equal(result, expected) + result = ~pd.Series(sparray) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = ~pd.DataFrame({"A": sparray}) + expected = pd.DataFrame({"A": expected}) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("fill_value", [0, np.nan]) @pytest.mark.parametrize("op", [operator.pos, operator.neg]) From d9a30d4d42d7e2c9db8c610864b9f32d0eb4f793 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 21 Jan 2020 15:22:30 -0600 Subject: [PATCH 3/7] issue --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index aa13a50068271..f61dc912d4e2b 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1018,7 +1018,7 @@ Numeric - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) - Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) - Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) -- Bug in dtypes being lost in ``DataFrame.__invert__`` with mixed dtypes (:issue:``) +- Bug in dtypes being lost in ``DataFrame.__invert__`` with mixed dtypes (:issue:`31183`) Conversion ^^^^^^^^^^ From f989a29d29d0daa3cec3570101499d0a8033880e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 22 Jan 2020 10:36:56 -0600 Subject: [PATCH 4/7] use apply --- pandas/core/generic.py | 2 +- pandas/core/internals/managers.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b89fa148d66e9..cdaf12392dc66 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1393,7 +1393,7 @@ def __invert__(self): # inv fails with 0 len return self - new_data = ~self._data + new_data = self._data.apply(operator.invert) result = self._constructor(new_data).__finalize__(self) return result diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a4c80cf585b96..847f543ebca4d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -528,10 +528,6 @@ def get_axe(block, qs, axes): [make_block(values, ndim=1, placement=np.arange(len(values)))], axes[0] ) - def __invert__(self): - new_blocks = [~blk for blk in self.blocks] - return type(self)(new_blocks, axes=self.axes) - def isna(self, func): return self.apply("apply", func=func) @@ -1607,9 +1603,6 @@ def concat(self, to_concat, new_axis): mgr = SingleBlockManager(new_block, new_axis) return mgr - def __invert__(self): - return type(self)(~self._block, self.axes) - # -------------------------------------------------------------------- # Constructor Helpers From 04dba41c3821068c32ba511e421059e70ee786a2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 23 Jan 2020 06:31:15 -0600 Subject: [PATCH 5/7] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: Joris Van den Bossche --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index f61dc912d4e2b..9fd7fec787b97 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1018,7 +1018,7 @@ Numeric - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) - Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) - Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) -- Bug in dtypes being lost in ``DataFrame.__invert__`` with mixed dtypes (:issue:`31183`) +- Bug in dtypes being lost in ``DataFrame.__invert__`` (``~`` operator) with mixed dtypes (:issue:`31183`) Conversion ^^^^^^^^^^ From 5860808e257c082a00843cc553439dbaac6a1421 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 23 Jan 2020 06:31:22 -0600 Subject: [PATCH 6/7] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: Joris Van den Bossche --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9fd7fec787b97..a50d7968b6f5a 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1167,7 +1167,7 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - Bug where nullable integers could not be compared to strings (:issue:`28930`) - Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) -- Bug in dtype being lost in ``__invert__`` for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) +- Bug in dtype being lost in ``__invert__`` (``~`` operator) for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) Other From fc81f81315d8919334e3a4bf7b026a793f8b091f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 23 Jan 2020 10:05:14 -0600 Subject: [PATCH 7/7] remove Block.invert --- pandas/core/internals/blocks.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cd6beef650f60..58ee72ace4d37 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -294,10 +294,6 @@ def __setstate__(self, state): self.values = state[1] self.ndim = self.values.ndim - def __invert__(self): - new_values = ~self.values - return make_block(new_values, self.mgr_locs, ndim=self.ndim) - def _slice(self, slicer): """ return a slice of my values """ return self.values[slicer]