Skip to content

Commit 3217b43

Browse files
authored
API: Series[bool][key] = np.nan -> cast to object (#38709)
1 parent 6e579ed commit 3217b43

File tree

5 files changed

+91
-66
lines changed

5 files changed

+91
-66
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,45 @@ Preserve dtypes in :meth:`~pandas.DataFrame.combine_first`
9797
combined.dtypes
9898
9999
100+
.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting:
101+
102+
Consistent Casting With Setting Into Boolean Series
103+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
104+
105+
Setting non-boolean values into a :class:`Series with ``dtype=bool`` consistently
106+
cast to ``dtype=object`` (:issue:`38709`)
107+
108+
.. ipython:: python
109+
110+
orig = pd.Series([True, False])
111+
ser = orig.copy()
112+
ser.iloc[1] = np.nan
113+
ser2 = orig.copy()
114+
ser2.iloc[1] = 2.0
115+
116+
*pandas 1.2.x*
117+
118+
.. code-block:: ipython
119+
120+
In [1]: ser
121+
Out [1]:
122+
0 1.0
123+
1 NaN
124+
dtype: float64
125+
126+
In [2]:ser2
127+
Out [2]:
128+
0 True
129+
1 2.0
130+
dtype: object
131+
132+
*pandas 1.3.0*
133+
134+
.. ipython:: python
135+
136+
ser
137+
ser2
138+
100139
.. _whatsnew_130.api_breaking.deps:
101140

102141
Increased minimum versions for dependencies

pandas/core/array_algos/putmask.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas._libs import lib
1010
from pandas._typing import ArrayLike
1111

12-
from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, maybe_promote
12+
from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, find_common_type
1313
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_list_like
1414
from pandas.core.dtypes.missing import isna_compat
1515

@@ -106,9 +106,7 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray:
106106
# preserves dtype if possible
107107
return _putmask_preserve(values, new, mask)
108108

109-
# change the dtype if needed
110-
dtype, _ = maybe_promote(new.dtype)
111-
109+
dtype = find_common_type([values.dtype, new.dtype])
112110
values = values.astype(dtype)
113111

114112
return _putmask_preserve(values, new, mask)

pandas/core/internals/blocks.py

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
convert_scalar_for_putitemlike,
2828
find_common_type,
2929
infer_dtype_from,
30-
infer_dtype_from_scalar,
3130
maybe_downcast_numeric,
3231
maybe_downcast_to_dtype,
3332
maybe_promote,
@@ -904,24 +903,7 @@ def setitem(self, indexer, value):
904903
values = self.values
905904
if not self._can_hold_element(value):
906905
# current dtype cannot store value, coerce to common dtype
907-
# TODO: can we just use coerce_to_target_dtype for all this
908-
if hasattr(value, "dtype"):
909-
dtype = value.dtype
910-
911-
elif lib.is_scalar(value) and not isna(value):
912-
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
913-
914-
else:
915-
# e.g. we are bool dtype and value is nan
916-
# TODO: watch out for case with listlike value and scalar/empty indexer
917-
dtype, _ = maybe_promote(np.array(value).dtype)
918-
return self.astype(dtype).setitem(indexer, value)
919-
920-
dtype = find_common_type([values.dtype, dtype])
921-
assert not is_dtype_equal(self.dtype, dtype)
922-
# otherwise should have _can_hold_element
923-
924-
return self.astype(dtype).setitem(indexer, value)
906+
return self.coerce_to_target_dtype(value).setitem(indexer, value)
925907

926908
if self.dtype.kind in ["m", "M"]:
927909
arr = self.array_values().T
@@ -1310,29 +1292,15 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
13101292
else:
13111293
# see if we can operate on the entire block, or need item-by-item
13121294
# or if we are a single block (ndim == 1)
1313-
if (
1314-
(self.dtype.kind in ["b", "i", "u"])
1315-
and lib.is_float(other)
1316-
and np.isnan(other)
1317-
):
1318-
# GH#3733 special case to avoid object-dtype casting
1319-
# and go through numexpr path instead.
1320-
# In integer case, np.where will cast to floats
1321-
pass
1322-
elif not self._can_hold_element(other):
1295+
if not self._can_hold_element(other):
13231296
# we cannot coerce, return a compat dtype
13241297
# we are explicitly ignoring errors
13251298
block = self.coerce_to_target_dtype(other)
13261299
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
13271300
return self._maybe_downcast(blocks, "infer")
13281301

1329-
if not (
1330-
(self.dtype.kind in ["b", "i", "u"])
1331-
and lib.is_float(other)
1332-
and np.isnan(other)
1333-
):
1334-
# convert datetime to datetime64, timedelta to timedelta64
1335-
other = convert_scalar_for_putitemlike(other, values.dtype)
1302+
# convert datetime to datetime64, timedelta to timedelta64
1303+
other = convert_scalar_for_putitemlike(other, values.dtype)
13361304

13371305
# By the time we get here, we should have all Series/Index
13381306
# args extracted to ndarray

pandas/tests/frame/indexing/test_mask.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@ def test_mask_callable(self):
7474
tm.assert_frame_equal(result, exp)
7575
tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
7676

77-
def test_mask_dtype_conversion(self):
77+
def test_mask_dtype_bool_conversion(self):
7878
# GH#3733
7979
df = DataFrame(data=np.random.randn(100, 50))
8080
df = df.where(df > 0) # create nans
8181
bools = df > 0
8282
mask = isna(df)
83-
expected = bools.astype(float).mask(mask)
83+
expected = bools.astype(object).mask(mask)
8484
result = bools.mask(mask)
8585
tm.assert_frame_equal(result, expected)
8686

pandas/tests/series/indexing/test_setitem.py

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -260,41 +260,47 @@ def test_setitem_callable_other(self):
260260
@pytest.mark.parametrize(
261261
"obj,expected,key",
262262
[
263-
(
263+
pytest.param(
264264
# these induce dtype changes
265265
Series([2, 3, 4, 5, 6, 7, 8, 9, 10]),
266266
Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]),
267267
slice(None, None, 2),
268+
id="int_series_slice_key_step",
268269
),
269-
(
270-
# gets coerced to float, right?
270+
pytest.param(
271271
Series([True, True, False, False]),
272-
Series([np.nan, 1, np.nan, 0]),
272+
Series([np.nan, True, np.nan, False], dtype=object),
273273
slice(None, None, 2),
274+
id="bool_series_slice_key_step",
274275
),
275-
(
276+
pytest.param(
276277
# these induce dtype changes
277278
Series(np.arange(10)),
278279
Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]),
279280
slice(None, 5),
281+
id="int_series_slice_key",
280282
),
281-
(
283+
pytest.param(
282284
# changes dtype GH#4463
283285
Series([1, 2, 3]),
284286
Series([np.nan, 2, 3]),
285287
0,
288+
id="int_series_int_key",
286289
),
287-
(
290+
pytest.param(
288291
# changes dtype GH#4463
289292
Series([False]),
290-
Series([np.nan]),
293+
Series([np.nan], dtype=object),
294+
# TODO: maybe go to float64 since we are changing the _whole_ Series?
291295
0,
296+
id="bool_series_int_key_change_all",
292297
),
293-
(
298+
pytest.param(
294299
# changes dtype GH#4463
295300
Series([False, True]),
296-
Series([np.nan, 1.0]),
301+
Series([np.nan, True], dtype=object),
297302
0,
303+
id="bool_series_int_key",
298304
),
299305
],
300306
)
@@ -307,45 +313,56 @@ class TestSetitemCastingEquivalents:
307313
- the setitem does not expand the obj
308314
"""
309315

310-
def test_int_key(self, obj, key, expected, indexer_sli):
316+
@pytest.fixture(params=[np.nan, np.float64("NaN")])
317+
def val(self, request):
318+
"""
319+
One python float NaN, one np.float64. Only np.float64 has a `dtype`
320+
attribute.
321+
"""
322+
return request.param
323+
324+
def test_int_key(self, obj, key, expected, val, indexer_sli):
311325
if not isinstance(key, int):
312326
return
313327

314328
obj = obj.copy()
315-
indexer_sli(obj)[key] = np.nan
329+
indexer_sli(obj)[key] = val
316330
tm.assert_series_equal(obj, expected)
317331

318-
def test_slice_key(self, obj, key, expected, indexer_si):
332+
def test_slice_key(self, obj, key, expected, val, indexer_si):
319333
# Note: no .loc because that handles slice edges differently
320334
obj = obj.copy()
321-
indexer_si(obj)[key] = np.nan
335+
indexer_si(obj)[key] = val
322336
tm.assert_series_equal(obj, expected)
323337

324-
def test_intlist_key(self, obj, key, expected, indexer_sli):
338+
def test_intlist_key(self, obj, key, expected, val, indexer_sli):
325339
ilkey = list(range(len(obj)))[key]
326340

327341
obj = obj.copy()
328-
indexer_sli(obj)[ilkey] = np.nan
342+
indexer_sli(obj)[ilkey] = val
329343
tm.assert_series_equal(obj, expected)
330344

331-
def test_mask_key(self, obj, key, expected, indexer_sli):
345+
def test_mask_key(self, obj, key, expected, val, indexer_sli):
332346
# setitem with boolean mask
333347
mask = np.zeros(obj.shape, dtype=bool)
334348
mask[key] = True
335349

336350
obj = obj.copy()
337-
indexer_sli(obj)[mask] = np.nan
351+
indexer_sli(obj)[mask] = val
338352
tm.assert_series_equal(obj, expected)
339353

340-
def test_series_where(self, obj, key, expected):
354+
def test_series_where(self, obj, key, expected, val):
341355
mask = np.zeros(obj.shape, dtype=bool)
342356
mask[key] = True
343357

344358
obj = obj.copy()
345-
res = obj.where(~mask, np.nan)
359+
res = obj.where(~mask, val)
346360
tm.assert_series_equal(res, expected)
347361

348-
def test_index_where(self, obj, key, expected, request):
362+
def test_index_where(self, obj, key, expected, val, request):
363+
if Index(obj).dtype != obj.dtype:
364+
pytest.skip("test not applicable for this dtype")
365+
349366
mask = np.zeros(obj.shape, dtype=bool)
350367
mask[key] = True
351368

@@ -355,15 +372,18 @@ def test_index_where(self, obj, key, expected, request):
355372
mark = pytest.mark.xfail(reason=msg)
356373
request.node.add_marker(mark)
357374

358-
res = Index(obj).where(~mask, np.nan)
375+
res = Index(obj).where(~mask, val)
359376
tm.assert_index_equal(res, Index(expected))
360377

361378
@pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692")
362-
def test_index_putmask(self, obj, key, expected):
379+
def test_index_putmask(self, obj, key, expected, val):
380+
if Index(obj).dtype != obj.dtype:
381+
pytest.skip("test not applicable for this dtype")
382+
363383
mask = np.zeros(obj.shape, dtype=bool)
364384
mask[key] = True
365385

366-
res = Index(obj).putmask(mask, np.nan)
386+
res = Index(obj).putmask(mask, val)
367387
tm.assert_index_equal(res, Index(expected))
368388

369389

0 commit comments

Comments
 (0)