diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 82cbf1397031d..28f1102ab738e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -96,6 +96,45 @@ Preserve dtypes in :meth:`~pandas.DataFrame.combine_first` combined.dtypes +.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting: + +Consistent Casting With Setting Into Boolean Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Setting non-boolean values into a :class:`Series with ``dtype=bool`` consistently +cast to ``dtype=object`` (:issue:`38709`) + +.. ipython:: python + + orig = pd.Series([True, False]) + ser = orig.copy() + ser.iloc[1] = np.nan + ser2 = orig.copy() + ser2.iloc[1] = 2.0 + +*pandas 1.2.x* + +.. code-block:: ipython + + In [1]: ser + Out [1]: + 0 1.0 + 1 NaN + dtype: float64 + + In [2]:ser2 + Out [2]: + 0 True + 1 2.0 + dtype: object + +*pandas 1.3.0* + +.. ipython:: python + + ser + ser2 + .. _whatsnew_130.api_breaking.deps: Increased minimum versions for dependencies diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 2a1b6f784a1f2..ca83692ad7ca4 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -9,7 +9,7 @@ from pandas._libs import lib from pandas._typing import ArrayLike -from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, maybe_promote +from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, find_common_type from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_list_like from pandas.core.dtypes.missing import isna_compat @@ -106,9 +106,7 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray: # preserves dtype if possible return _putmask_preserve(values, new, mask) - # change the dtype if needed - dtype, _ = maybe_promote(new.dtype) - + dtype = find_common_type([values.dtype, new.dtype]) values = values.astype(dtype) return _putmask_preserve(values, new, mask) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0ff3637669388..fe5256ca42982 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -27,7 +27,6 @@ convert_scalar_for_putitemlike, find_common_type, infer_dtype_from, - infer_dtype_from_scalar, maybe_downcast_numeric, maybe_downcast_to_dtype, maybe_promote, @@ -904,24 +903,7 @@ def setitem(self, indexer, value): values = self.values if not self._can_hold_element(value): # current dtype cannot store value, coerce to common dtype - # TODO: can we just use coerce_to_target_dtype for all this - if hasattr(value, "dtype"): - dtype = value.dtype - - elif lib.is_scalar(value) and not isna(value): - dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True) - - else: - # e.g. we are bool dtype and value is nan - # TODO: watch out for case with listlike value and scalar/empty indexer - dtype, _ = maybe_promote(np.array(value).dtype) - return self.astype(dtype).setitem(indexer, value) - - dtype = find_common_type([values.dtype, dtype]) - assert not is_dtype_equal(self.dtype, dtype) - # otherwise should have _can_hold_element - - return self.astype(dtype).setitem(indexer, value) + return self.coerce_to_target_dtype(value).setitem(indexer, value) if self.dtype.kind in ["m", "M"]: arr = self.array_values().T @@ -1310,29 +1292,15 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: else: # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) - if ( - (self.dtype.kind in ["b", "i", "u"]) - and lib.is_float(other) - and np.isnan(other) - ): - # GH#3733 special case to avoid object-dtype casting - # and go through numexpr path instead. - # In integer case, np.where will cast to floats - pass - elif not self._can_hold_element(other): + if not self._can_hold_element(other): # we cannot coerce, return a compat dtype # we are explicitly ignoring errors block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond, errors=errors, axis=axis) return self._maybe_downcast(blocks, "infer") - if not ( - (self.dtype.kind in ["b", "i", "u"]) - and lib.is_float(other) - and np.isnan(other) - ): - # convert datetime to datetime64, timedelta to timedelta64 - other = convert_scalar_for_putitemlike(other, values.dtype) + # convert datetime to datetime64, timedelta to timedelta64 + other = convert_scalar_for_putitemlike(other, values.dtype) # By the time we get here, we should have all Series/Index # args extracted to ndarray diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 8050769f56f6c..bd541719c0877 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -74,13 +74,13 @@ def test_mask_callable(self): tm.assert_frame_equal(result, exp) tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10)) - def test_mask_dtype_conversion(self): + def test_mask_dtype_bool_conversion(self): # GH#3733 df = DataFrame(data=np.random.randn(100, 50)) df = df.where(df > 0) # create nans bools = df > 0 mask = isna(df) - expected = bools.astype(float).mask(mask) + expected = bools.astype(object).mask(mask) result = bools.mask(mask) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7f469f361fec7..a49fafe8e90bd 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -242,41 +242,47 @@ def test_setitem_callable_other(self): @pytest.mark.parametrize( "obj,expected,key", [ - ( + pytest.param( # these induce dtype changes Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]), slice(None, None, 2), + id="int_series_slice_key_step", ), - ( - # gets coerced to float, right? + pytest.param( Series([True, True, False, False]), - Series([np.nan, 1, np.nan, 0]), + Series([np.nan, True, np.nan, False], dtype=object), slice(None, None, 2), + id="bool_series_slice_key_step", ), - ( + pytest.param( # these induce dtype changes Series(np.arange(10)), Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]), slice(None, 5), + id="int_series_slice_key", ), - ( + pytest.param( # changes dtype GH#4463 Series([1, 2, 3]), Series([np.nan, 2, 3]), 0, + id="int_series_int_key", ), - ( + pytest.param( # changes dtype GH#4463 Series([False]), - Series([np.nan]), + Series([np.nan], dtype=object), + # TODO: maybe go to float64 since we are changing the _whole_ Series? 0, + id="bool_series_int_key_change_all", ), - ( + pytest.param( # changes dtype GH#4463 Series([False, True]), - Series([np.nan, 1.0]), + Series([np.nan, True], dtype=object), 0, + id="bool_series_int_key", ), ], ) @@ -289,45 +295,56 @@ class TestSetitemCastingEquivalents: - the setitem does not expand the obj """ - def test_int_key(self, obj, key, expected, indexer_sli): + @pytest.fixture(params=[np.nan, np.float64("NaN")]) + def val(self, request): + """ + One python float NaN, one np.float64. Only np.float64 has a `dtype` + attribute. + """ + return request.param + + def test_int_key(self, obj, key, expected, val, indexer_sli): if not isinstance(key, int): return obj = obj.copy() - indexer_sli(obj)[key] = np.nan + indexer_sli(obj)[key] = val tm.assert_series_equal(obj, expected) - def test_slice_key(self, obj, key, expected, indexer_si): + def test_slice_key(self, obj, key, expected, val, indexer_si): # Note: no .loc because that handles slice edges differently obj = obj.copy() - indexer_si(obj)[key] = np.nan + indexer_si(obj)[key] = val tm.assert_series_equal(obj, expected) - def test_intlist_key(self, obj, key, expected, indexer_sli): + def test_intlist_key(self, obj, key, expected, val, indexer_sli): ilkey = list(range(len(obj)))[key] obj = obj.copy() - indexer_sli(obj)[ilkey] = np.nan + indexer_sli(obj)[ilkey] = val tm.assert_series_equal(obj, expected) - def test_mask_key(self, obj, key, expected, indexer_sli): + def test_mask_key(self, obj, key, expected, val, indexer_sli): # setitem with boolean mask mask = np.zeros(obj.shape, dtype=bool) mask[key] = True obj = obj.copy() - indexer_sli(obj)[mask] = np.nan + indexer_sli(obj)[mask] = val tm.assert_series_equal(obj, expected) - def test_series_where(self, obj, key, expected): + def test_series_where(self, obj, key, expected, val): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True obj = obj.copy() - res = obj.where(~mask, np.nan) + res = obj.where(~mask, val) tm.assert_series_equal(res, expected) - def test_index_where(self, obj, key, expected, request): + def test_index_where(self, obj, key, expected, val, request): + if Index(obj).dtype != obj.dtype: + pytest.skip("test not applicable for this dtype") + mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -337,15 +354,18 @@ def test_index_where(self, obj, key, expected, request): mark = pytest.mark.xfail(reason=msg) request.node.add_marker(mark) - res = Index(obj).where(~mask, np.nan) + res = Index(obj).where(~mask, val) tm.assert_index_equal(res, Index(expected)) @pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692") - def test_index_putmask(self, obj, key, expected): + def test_index_putmask(self, obj, key, expected, val): + if Index(obj).dtype != obj.dtype: + pytest.skip("test not applicable for this dtype") + mask = np.zeros(obj.shape, dtype=bool) mask[key] = True - res = Index(obj).putmask(mask, np.nan) + res = Index(obj).putmask(mask, val) tm.assert_index_equal(res, Index(expected))