From 9b0b313f1b06da72994af012fbf9c6dff49062b6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Aug 2021 15:20:20 -0700 Subject: [PATCH 1/2] REF: avoid need for get_join_target in join_non_unique --- pandas/core/indexes/base.py | 48 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8c32ccc1fa74c..d1ca4c8fff7e3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4089,8 +4089,6 @@ def join( join_index, (left_indexer, right_indexer) """ other = ensure_index(other) - self_is_mi = isinstance(self, ABCMultiIndex) - other_is_mi = isinstance(other, ABCMultiIndex) if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): if (self.tz is None) ^ (other.tz is None): @@ -4110,7 +4108,7 @@ def join( # try to figure out the join level # GH3662 - if level is None and (self_is_mi or other_is_mi): + if level is None and (self._is_multi or other._is_multi): # have the same levels/names so a simple join if self.names == other.names: @@ -4119,7 +4117,7 @@ def join( return self._join_multi(other, how=how) # join on the level - if level is not None and (self_is_mi or other_is_mi): + if level is not None and (self._is_multi or other._is_multi): return self._join_level(other, level, how=how) if len(other) == 0 and how in ("left", "outer"): @@ -4168,8 +4166,20 @@ def join( try: return self._join_monotonic(other, how=how) except TypeError: + # object dtype; non-comparable objects pass + return self._join_via_get_indexer(other, how, sort) + + @final + def _join_via_get_indexer( + self, other: Index, how: str_t, sort: bool + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + # Fallback if we do not have any fastpaths available based on + # uniqueness/monotonicity + + # Note: at this point we have checked matching dtypes + if how == "left": join_index = self elif how == "right": @@ -4279,22 +4289,26 @@ def _join_non_unique( # We only get here if dtypes match assert self.dtype == other.dtype - lvalues = self._get_join_target() - rvalues = other._get_join_target() - left_idx, right_idx = get_join_indexers( - [lvalues], [rvalues], how=how, sort=True + [self._values], [other._values], how=how, sort=True ) + mask = left_idx == -1 - left_idx = ensure_platform_int(left_idx) - right_idx = ensure_platform_int(right_idx) + # error: Argument 1 to "take" of "ExtensionArray" has incompatible + # type "ndarray[Any, dtype[signedinteger[Any]]]"; expected "Sequence[int]" + join_array = self._values.take(left_idx) # type: ignore[arg-type] + # error: Argument 1 to "take" of "ExtensionArray" has incompatible type + # "ndarray[Any, dtype[signedinteger[Any]]]"; expected "Sequence[int]" + right = other._values.take(right_idx) # type: ignore[arg-type] - join_array = np.asarray(lvalues.take(left_idx)) - mask = left_idx == -1 - np.putmask(join_array, mask, rvalues.take(right_idx)) + # TODO: just use Index.putmask once GH#43212 is fixed + if isinstance(join_array, np.ndarray): + np.putmask(join_array, mask, right) + else: + # error: "ExtensionArray" has no attribute "putmask" + join_array.putmask(mask, right) # type: ignore[attr-defined] - join_arraylike = self._from_join_target(join_array) - join_index = self._wrap_joined_index(join_arraylike, other) + join_index = self._wrap_joined_index(join_array, other) return join_index, left_idx, right_idx @@ -4447,7 +4461,9 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: return join_index, left_indexer, right_indexer @final - def _join_monotonic(self, other: Index, how: str_t = "left"): + def _join_monotonic( + self, other: Index, how: str_t = "left" + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: # We only get here with matching dtypes assert other.dtype == self.dtype From 845146e804be49b92d385fe1e59f2b37f9affa29 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Sep 2021 21:01:22 -0700 Subject: [PATCH 2/2] remove outdated comment --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 08dc1f372b804..35e07e864736c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4304,7 +4304,6 @@ def _join_non_unique( # "ndarray[Any, dtype[signedinteger[Any]]]"; expected "Sequence[int]" right = other._values.take(right_idx) # type: ignore[arg-type] - # TODO: just use Index.putmask once GH#43212 is fixed if isinstance(join_array, np.ndarray): np.putmask(join_array, mask, right) else: