diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index d86c60d78195b..1827337b1b19a 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -190,6 +190,9 @@ def _from_factorized(cls, values, original): def _values_for_argsort(self) -> np.ndarray: return self._ndarray + def _values_for_factorize(self): + return self._ndarray, self._internal_fill_value + # Signature of "argmin" incompatible with supertype "ExtensionArray" def argmin(self, axis: int = 0, skipna: bool = True): # type: ignore[override] # override base class by adding axis keyword diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9d649c533619e..eca7a205983ef 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2298,9 +2298,6 @@ def unique(self): unique_codes = unique1d(self.codes) return self._from_backing_data(unique_codes) - def _values_for_factorize(self): - return self._ndarray, -1 - def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray: # make sure we have correct itemsize for resulting codes res_values = coerce_indexer_dtype(res_values, self.dtype.categories) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 93d32217ef322..ed19c77f3cf4b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -549,9 +549,6 @@ def copy(self: DatetimeLikeArrayT, order="C") -> DatetimeLikeArrayT: new_obj._freq = self.freq return new_obj - def _values_for_factorize(self): - return self._ndarray, self._internal_fill_value - # ------------------------------------------------------------------ # Validation Methods # TODO: try to de-duplicate these, ensure identical behavior diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 3d6e721ec273b..be7dc5e0ebdc6 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -189,8 +189,12 @@ def _validate_scalar(self, fill_value): fill_value = self.dtype.na_value return fill_value - def _values_for_factorize(self) -> tuple[np.ndarray, int]: - return self._ndarray, -1 + def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: + if self.dtype.kind in ["i", "u", "b"]: + fv = None + else: + fv = np.nan + return self._ndarray, fv # ------------------------------------------------------------------------ # Reductions diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index beef5522ed1ab..c748d487a2f9c 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -309,3 +309,16 @@ def test_quantile_empty(dtype): result = arr._quantile(idx, interpolation="linear") expected = PandasArray(np.array([np.nan, np.nan])) tm.assert_extension_array_equal(result, expected) + + +def test_factorize_unsigned(): + # don't raise when calling factorize on unsigned int PandasArray + arr = np.array([1, 2, 3], dtype=np.uint64) + obj = PandasArray(arr) + + res_codes, res_unique = obj.factorize() + exp_codes, exp_unique = pd.factorize(arr) + + tm.assert_numpy_array_equal(res_codes, exp_codes) + + tm.assert_extension_array_equal(res_unique, PandasArray(exp_unique))