Skip to content

Commit ea59c38

Browse files
fix failing test_factorize_equivalence
1 parent dbc8253 commit ea59c38

File tree

1 file changed

+10
-17
lines changed

1 file changed

+10
-17
lines changed

pandas/core/arrays/string_arrow.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
is_integer_dtype,
3737
is_scalar,
3838
)
39-
from pandas.core.algorithms import factorize
4039
from pandas.core.arraylike import OpsMixin
4140
from pandas.core.arrays.base import ExtensionArray
4241
from pandas.core.indexers import (
@@ -279,22 +278,16 @@ def __len__(self) -> int:
279278

280279
@doc(ExtensionArray.factorize)
281280
def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]:
282-
if self._data.num_chunks == 1:
283-
encoded = self._data.chunk(0).dictionary_encode()
284-
indices = encoded.indices.to_pandas()
285-
if indices.dtype.kind == "f":
286-
indices[np.isnan(indices)] = na_sentinel
287-
indices = indices.astype(int)
288-
if not is_int64_dtype(indices):
289-
indices = indices.astype(np.int64)
290-
return indices.values, type(self)(encoded.dictionary)
291-
else:
292-
np_array = self._data.to_pandas().values
293-
# error: Incompatible return value type (got "Tuple[Any, Union[Any,
294-
# Index]]", expected "Tuple[Any, ExtensionArray]")
295-
return factorize( # type: ignore[return-value]
296-
np_array, na_sentinel=na_sentinel
297-
)
281+
encoded = self._data.dictionary_encode()
282+
indices = pa.chunked_array(
283+
[c.indices for c in encoded.chunks], type=encoded.type.index_type
284+
).to_pandas()
285+
if indices.dtype.kind == "f":
286+
indices[np.isnan(indices)] = na_sentinel
287+
indices = indices.astype(int)
288+
if not is_int64_dtype(indices):
289+
indices = indices.astype(np.int64)
290+
return indices.values, type(self)(encoded.chunk(0).dictionary)
298291

299292
@classmethod
300293
def _concat_same_type(cls, to_concat) -> ArrowStringArray:

0 commit comments

Comments
 (0)