diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 84ad478226175..c14bcb6c6d8c5 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -796,13 +796,13 @@ ExtensionArray ^^^^^^^^^^^^^^ - Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`) +- Fixed bug where :meth:`StringArray.isna` would return ``False`` for NA values when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33655`) - Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`) - Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`). - Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`) - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`) - Other ^^^^^ - Appending a dictionary to a :class:`DataFrame` without passing ``ignore_index=True`` will raise ``TypeError: Can only append a dict if ignore_index=True`` diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 62a8bfccaa432..6bca5e370ac89 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -90,11 +90,6 @@ cpdef bint checknull_old(object val): return False -cdef inline bint _check_none_nan_inf_neginf(object val): - return val is None or (isinstance(val, float) and - (val != val or val == INF or val == NEGINF)) - - @cython.wraparound(False) @cython.boundscheck(False) cpdef ndarray[uint8_t] isnaobj(ndarray arr): @@ -141,6 +136,7 @@ def isnaobj_old(arr: ndarray) -> ndarray: - INF - NEGINF - NaT + - NA Parameters ---------- @@ -161,7 +157,7 @@ def isnaobj_old(arr: ndarray) -> ndarray: result = np.zeros(n, dtype=np.uint8) for i in range(n): val = arr[i] - result[i] = val is NaT or _check_none_nan_inf_neginf(val) + result[i] = checknull(val) or val == INF or val == NEGINF return result.view(np.bool_) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 048fd7adf55b1..ab8df492f1c01 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -17,6 +17,7 @@ TD64NS_DTYPE, ensure_object, is_bool_dtype, + is_categorical_dtype, is_complex_dtype, is_datetimelike_v_numeric, is_dtype_equal, @@ -209,8 +210,8 @@ def _isna_ndarraylike(obj, inf_as_na: bool = False): dtype = values.dtype if is_extension_array_dtype(dtype): - if inf_as_na: - result = values.isna() | (values == -np.inf) | (values == np.inf) + if inf_as_na and is_categorical_dtype(dtype): + result = libmissing.isnaobj_old(values.to_numpy()) else: result = values.isna() elif is_string_dtype(dtype): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index b681abf03a2b3..6f9a1a5be4c43 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -305,6 +305,28 @@ def test_value_counts_na(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "values, expected", + [ + (pd.array(["a", "b", "c"]), np.array([False, False, False])), + (pd.array(["a", "b", None]), np.array([False, False, True])), + ], +) +def test_use_inf_as_na(values, expected): + # https://github.com/pandas-dev/pandas/issues/33655 + with pd.option_context("mode.use_inf_as_na", True): + result = values.isna() + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(values).isna() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = pd.DataFrame(values).isna() + expected = pd.DataFrame(expected) + tm.assert_frame_equal(result, expected) + + def test_memory_usage(): # GH 33963 series = pd.Series(["a", "b", "c"], dtype="string") diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 2393d2edcd2c6..a5969ef961bab 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -127,3 +127,10 @@ def test_fillna_fill_other(self, data): expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) self.assert_frame_equal(result, expected) + + def test_use_inf_as_na_no_effect(self, data_missing): + ser = pd.Series(data_missing) + expected = ser.isna() + with pd.option_context("mode.use_inf_as_na", True): + result = ser.isna() + self.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 3b8c2e595148e..162778e372426 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -509,12 +509,12 @@ def test_fillna_nat(self): tm.assert_frame_equal(filled2, expected) def test_isna_for_inf(self): - s = Series(["a", np.inf, np.nan, 1.0]) + s = Series(["a", np.inf, np.nan, pd.NA, 1.0]) with pd.option_context("mode.use_inf_as_na", True): r = s.isna() dr = s.dropna() - e = Series([False, True, True, False]) - de = Series(["a", 1.0], index=[0, 3]) + e = Series([False, True, True, True, False]) + de = Series(["a", 1.0], index=[0, 4]) tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de)