diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index d54d1855ac2f8..2ab6b85fab074 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -271,7 +271,9 @@ def __getitem__( def fillna( self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None ) -> NDArrayBackedExtensionArrayT: - value, method = validate_fillna_kwargs(value, method) + value, method = validate_fillna_kwargs( + value, method, validate_scalar_dict_value=False + ) mask = self.isna() value = missing.check_value_size(value, mask, len(self)) @@ -291,6 +293,10 @@ def fillna( new_values = self.copy() new_values[mask] = value else: + # We validate the fill_value even if there is nothing to fill + if value is not None: + self._validate_setitem_value(value) + new_values = self.copy() return new_values diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 8c242e3800e48..a95caf74442c2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,10 +40,7 @@ cache_readonly, deprecate_kwarg, ) -from pandas.util._validators import ( - validate_bool_kwarg, - validate_fillna_kwargs, -) +from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( coerce_indexer_dtype, @@ -102,7 +99,6 @@ sanitize_array, ) from pandas.core.indexers import deprecate_ndim_indexing -from pandas.core.missing import interpolate_2d from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.sorting import nargsort from pandas.core.strings.object_array import ObjectStringArrayMixin @@ -1730,67 +1726,6 @@ def to_dense(self): ) return np.asarray(self) - def fillna(self, value=None, method=None, limit=None): - """ - Fill NA/NaN values using the specified method. - - Parameters - ---------- - value : scalar, dict, Series - If a scalar value is passed it is used to fill all missing values. - Alternatively, a Series or dict can be used to fill in different - values for each index. The value should not be a list. The - value(s) passed should either be in the categories or should be - NaN. - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - limit : int, default None - (Not implemented yet for Categorical!) - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill. In other words, if there is - a gap with more than this number of consecutive NaNs, it will only - be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. - - Returns - ------- - filled : Categorical with NA/NaN filled - """ - value, method = validate_fillna_kwargs( - value, method, validate_scalar_dict_value=False - ) - value = extract_array(value, extract_numpy=True) - - if value is None: - value = np.nan - if limit is not None: - raise NotImplementedError( - "specifying a limit for fillna has not been implemented yet" - ) - - if method is not None: - # pad / bfill - - # TODO: dispatch when self.categories is EA-dtype - values = np.asarray(self).reshape(-1, len(self)) - values = interpolate_2d(values, method, 0, None).astype( - self.categories.dtype - )[0] - codes = _get_codes_for_values(values, self.categories) - - else: - # We copy even if there is nothing to fill - codes = self._ndarray.copy() - mask = self.isna() - - new_codes = self._validate_setitem_value(value) - np.putmask(codes, mask, new_codes) - - return self._from_backing_data(codes) - # ------------------------------------------------------------------ # NDArrayBackedExtensionArray compat diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 869836a3da70c..1583144702c72 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -369,7 +369,15 @@ def __contains__(self, key: Any) -> bool: @doc(Index.fillna) def fillna(self, value, downcast=None): value = self._require_scalar(value) - cat = self._data.fillna(value) + try: + cat = self._data.fillna(value) + except (ValueError, TypeError): + # invalid fill_value + if not self.isna().any(): + # nothing to fill, we can get away without casting + return self.copy() + return self.astype(object).fillna(value, downcast=downcast) + return type(self)._simple_new(cat, name=self.name) @doc(Index.unique) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 718ef087e47d3..bc1f499a70aa0 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -304,16 +304,6 @@ class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): class TestMissing(BaseNumPyTests, base.BaseMissingTests): - @skip_nested - def test_fillna_scalar(self, data_missing): - # Non-scalar "scalar" values. - super().test_fillna_scalar(data_missing) - - @skip_nested - def test_fillna_no_op_returns_copy(self, data): - # Non-scalar "scalar" values. - super().test_fillna_no_op_returns_copy(data) - @skip_nested def test_fillna_series(self, data_missing): # Non-scalar "scalar" values. diff --git a/pandas/tests/indexes/categorical/test_fillna.py b/pandas/tests/indexes/categorical/test_fillna.py index c8fc55c29054e..817e996f49162 100644 --- a/pandas/tests/indexes/categorical/test_fillna.py +++ b/pandas/tests/indexes/categorical/test_fillna.py @@ -13,10 +13,16 @@ def test_fillna_categorical(self): exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") tm.assert_index_equal(idx.fillna(1.0), exp) - # fill by value not in categories raises ValueError + cat = idx._data + + # fill by value not in categories raises ValueError on EA, casts on CI msg = "Cannot setitem on a Categorical with a new category" with pytest.raises(ValueError, match=msg): - idx.fillna(2.0) + cat.fillna(2.0) + + result = idx.fillna(2.0) + expected = idx.astype(object).fillna(2.0) + tm.assert_index_equal(result, expected) def test_fillna_copies_with_no_nas(self): # Nothing to fill, should still get a copy @@ -37,8 +43,9 @@ def test_fillna_validates_with_no_nas(self): cat = ci._data msg = "Cannot setitem on a Categorical with a new category" - with pytest.raises(ValueError, match=msg): - ci.fillna(False) + res = ci.fillna(False) + # nothing to fill, so we dont cast + tm.assert_index_equal(res, ci) # Same check directly on the Categorical with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 5b3a6c13af467..cf6b357d0a418 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -671,13 +671,15 @@ def test_fillna_categorical_with_new_categories(self, fill_value, expected_outpu def test_fillna_categorical_raises(self): data = ["a", np.nan, "b", np.nan, np.nan] ser = Series(Categorical(data, categories=["a", "b"])) + cat = ser._values msg = "Cannot setitem on a Categorical with a new category" with pytest.raises(ValueError, match=msg): ser.fillna("d") - with pytest.raises(ValueError, match=msg): - ser.fillna(Series("d")) + msg2 = "Length of 'value' does not match." + with pytest.raises(ValueError, match=msg2): + cat.fillna(Series("d")) with pytest.raises(ValueError, match=msg): ser.fillna({1: "d", 3: "a"})