|
36 | 36 | is_integer_dtype,
|
37 | 37 | is_scalar,
|
38 | 38 | )
|
39 |
| -from pandas.core.algorithms import factorize |
40 | 39 | from pandas.core.arraylike import OpsMixin
|
41 | 40 | from pandas.core.arrays.base import ExtensionArray
|
42 | 41 | from pandas.core.indexers import (
|
@@ -279,22 +278,16 @@ def __len__(self) -> int:
|
279 | 278 |
|
280 | 279 | @doc(ExtensionArray.factorize)
|
281 | 280 | def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]:
|
282 |
| - if self._data.num_chunks == 1: |
283 |
| - encoded = self._data.chunk(0).dictionary_encode() |
284 |
| - indices = encoded.indices.to_pandas() |
285 |
| - if indices.dtype.kind == "f": |
286 |
| - indices[np.isnan(indices)] = na_sentinel |
287 |
| - indices = indices.astype(int) |
288 |
| - if not is_int64_dtype(indices): |
289 |
| - indices = indices.astype(np.int64) |
290 |
| - return indices.values, type(self)(encoded.dictionary) |
291 |
| - else: |
292 |
| - np_array = self._data.to_pandas().values |
293 |
| - # error: Incompatible return value type (got "Tuple[Any, Union[Any, |
294 |
| - # Index]]", expected "Tuple[Any, ExtensionArray]") |
295 |
| - return factorize( # type: ignore[return-value] |
296 |
| - np_array, na_sentinel=na_sentinel |
297 |
| - ) |
| 281 | + encoded = self._data.dictionary_encode() |
| 282 | + indices = pa.chunked_array( |
| 283 | + [c.indices for c in encoded.chunks], type=encoded.type.index_type |
| 284 | + ).to_pandas() |
| 285 | + if indices.dtype.kind == "f": |
| 286 | + indices[np.isnan(indices)] = na_sentinel |
| 287 | + indices = indices.astype(int) |
| 288 | + if not is_int64_dtype(indices): |
| 289 | + indices = indices.astype(np.int64) |
| 290 | + return indices.values, type(self)(encoded.chunk(0).dictionary) |
298 | 291 |
|
299 | 292 | @classmethod
|
300 | 293 | def _concat_same_type(cls, to_concat) -> ArrowStringArray:
|
|
0 commit comments