diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 02899bac14bb2..892fa83f98755 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -24,6 +24,7 @@ ) from pandas.core.dtypes.generic import ( ABCDataFrame, + ABCExtensionArray, ABCIndex, ABCMultiIndex, ABCSeries, @@ -286,10 +287,17 @@ def hash_array( if is_categorical_dtype(dtype): vals = cast("Categorical", vals) return _hash_categorical(vals, encoding, hash_key) - elif not isinstance(vals, np.ndarray): - # i.e. ExtensionArray + + elif isinstance(vals, ABCExtensionArray): vals, _ = vals._values_for_factorize() + elif not isinstance(vals, np.ndarray): + # GH#42003 + raise TypeError( + "hash_array requires np.ndarray or ExtensionArray, not " + f"{type(vals).__name__}. Use hash_pandas_object instead." + ) + return _hash_ndarray(vals, encoding, hash_key, categorize) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 6eee756f67a2e..c2977b81a9b4a 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -71,6 +71,15 @@ def test_hash_array_errors(val): hash_array(val) +def test_hash_array_index_exception(): + # GH42003 TypeError instead of AttributeError + obj = pd.DatetimeIndex(["2018-10-28 01:20:00"], tz="Europe/Berlin") + + msg = "Use hash_pandas_object instead" + with pytest.raises(TypeError, match=msg): + hash_array(obj) + + def test_hash_tuples(): tuples = [(1, "one"), (1, "two"), (2, "one")] result = hash_tuples(tuples)