From af72f4c15cc9beed3d93e38df29aec31960be453 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 15:59:22 -0500
Subject: [PATCH 01/24] Improve merge performance

---
 asv_bench/benchmarks/join_merge.py         | 114 ++++-----------------
 pandas/_libs/hashtable_class_helper.pxi.in |  32 ++++++
 pandas/core/indexes/base.py                |  22 ++++
 pandas/core/reshape/merge.py               |   2 +
 4 files changed, 75 insertions(+), 95 deletions(-)

diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index ce64304731116..b5a1007b566c9 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -20,101 +20,6 @@
     from pandas import ordered_merge as merge_ordered
 
 
-class Concat:
-    params = [0, 1]
-    param_names = ["axis"]
-
-    def setup(self, axis):
-        N = 1000
-        s = Series(N, index=Index([f"i-{i}" for i in range(N)], dtype=object))
-        self.series = [s[i:-i] for i in range(1, 10)] * 50
-        self.small_frames = [DataFrame(np.random.randn(5, 4))] * 1000
-        df = DataFrame(
-            {"A": range(N)}, index=date_range("20130101", periods=N, freq="s")
-        )
-        self.empty_left = [DataFrame(), df]
-        self.empty_right = [df, DataFrame()]
-        self.mixed_ndims = [df, df.head(N // 2)]
-
-    def time_concat_series(self, axis):
-        concat(self.series, axis=axis, sort=False)
-
-    def time_concat_small_frames(self, axis):
-        concat(self.small_frames, axis=axis)
-
-    def time_concat_empty_right(self, axis):
-        concat(self.empty_right, axis=axis)
-
-    def time_concat_empty_left(self, axis):
-        concat(self.empty_left, axis=axis)
-
-    def time_concat_mixed_ndims(self, axis):
-        concat(self.mixed_ndims, axis=axis)
-
-
-class ConcatDataFrames:
-    params = ([0, 1], [True, False])
-    param_names = ["axis", "ignore_index"]
-
-    def setup(self, axis, ignore_index):
-        frame_c = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="C"))
-        self.frame_c = [frame_c] * 20
-        frame_f = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="F"))
-        self.frame_f = [frame_f] * 20
-
-    def time_c_ordered(self, axis, ignore_index):
-        concat(self.frame_c, axis=axis, ignore_index=ignore_index)
-
-    def time_f_ordered(self, axis, ignore_index):
-        concat(self.frame_f, axis=axis, ignore_index=ignore_index)
-
-
-class ConcatIndexDtype:
-    params = (
-        [
-            "datetime64[ns]",
-            "int64",
-            "Int64",
-            "int64[pyarrow]",
-            "string[python]",
-            "string[pyarrow]",
-        ],
-        ["monotonic", "non_monotonic", "has_na"],
-        [0, 1],
-        [True, False],
-    )
-    param_names = ["dtype", "structure", "axis", "sort"]
-
-    def setup(self, dtype, structure, axis, sort):
-        N = 10_000
-        if dtype == "datetime64[ns]":
-            vals = date_range("1970-01-01", periods=N)
-        elif dtype in ("int64", "Int64", "int64[pyarrow]"):
-            vals = np.arange(N, dtype=np.int64)
-        elif dtype in ("string[python]", "string[pyarrow]"):
-            vals = Index([f"i-{i}" for i in range(N)], dtype=object)
-        else:
-            raise NotImplementedError
-
-        idx = Index(vals, dtype=dtype)
-
-        if structure == "monotonic":
-            idx = idx.sort_values()
-        elif structure == "non_monotonic":
-            idx = idx[::-1]
-        elif structure == "has_na":
-            if not idx._can_hold_na:
-                raise NotImplementedError
-            idx = Index([None], dtype=dtype).append(idx)
-        else:
-            raise NotImplementedError
-
-        self.series = [Series(i, idx[:-i]) for i in range(1, 6)]
-
-    def time_concat_series(self, dtype, structure, axis, sort):
-        concat(self.series, axis=axis, sort=sort)
-
-
 class Join:
     params = [True, False]
     param_names = ["sort"]
@@ -328,6 +233,25 @@ def time_i8merge(self, how):
         merge(self.left, self.right, how=how)
 
 
+class UniqueMerge:
+    params = [["left", "right"], [4_000_000, 1_000_000]]
+    param_names = ["unique", "unique_elements"]
+
+    def setup(self, unique, unique_elements):
+        N = 1_000_000
+        self.left = DataFrame({"a": np.random.randint(1, unique_elements, (N,))})
+        self.right = DataFrame({"a": np.random.randint(1, unique_elements, (N,))})
+        uniques = self.right.a.drop_duplicates()
+        self.right["a"] = concat(
+            [uniques, Series(np.arange(0, -(N - len(uniques)), -1))], ignore_index=True
+        )
+        if unique == "left":
+            self.left, self.right = self.right, self.left
+
+    def time_unique_merge(self, unique, unique_elements):
+        merge(self.left, self.right, how="inner")
+
+
 class MergeDatetime:
     params = [
         [
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 629b6b42db852..ae2932a526ce3 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -545,6 +545,38 @@ cdef class {{name}}HashTable(HashTable):
                     self.table.vals[k] = i
         self.na_position = na_position
 
+    @cython.boundscheck(False)
+    def inner_join_unique(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> tuple[ndarray, ndarray]:
+        cdef:
+            Py_ssize_t i, n = len(values)
+            int ret = 0, ctr = 0
+            {{c_type}} val
+            khiter_t k
+            intp_t[::1] locs = np.empty(n, dtype=np.intp)
+            intp_t[::1] self_locs = np.empty(n, dtype=np.intp)
+            int8_t na_position = self.na_position
+
+        if self.uses_mask and mask is None:
+            raise NotImplementedError  # pragma: no cover
+
+        with nogil:
+            for i in range(n):
+                if self.uses_mask and mask[i]:
+                    if self.na_position == -1:
+                        continue
+                    self_locs[ctr] = na_position
+                    locs[ctr] = na_position
+                    ctr += 1
+                else:
+                    val = {{to_c_type}}(values[i])
+                    k = kh_get_{{dtype}}(self.table, val)
+                    if k != self.table.n_buckets:
+                        self_locs[ctr] = self.table.vals[k]
+                        locs[ctr] = i
+                        ctr += 1
+
+        return np.asarray(self_locs)[:ctr], np.asarray(locs)[:ctr]
+
     @cython.boundscheck(False)
     def lookup(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> ndarray:
         # -> np.ndarray[np.intp]
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3c01778e05f3d..bbcdac90256d7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4698,6 +4698,28 @@ def _can_use_libjoin(self) -> bool:
         #  Doing so seems to break test_concat_datetime_timezone
         return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex))
 
+    @final
+    def _can_use_unique_join(self, other: Index, how, sort: bool) -> bool:
+        if how != "inner":
+            return False
+        if sort:
+            # Not worth it if we're going to sort the result
+            return False
+
+        if self.dtype.kind in "iufb" or self.dtype == "O":
+            return other.is_unique or self.is_unique
+        return False
+
+    def _unique_join(self, other: Index) -> tuple[np.ndarray, np.ndarray]:
+        if other.is_unique:
+            ridx, lidx = other._engine.mapping.inner_join_unique(self._values)
+        else:
+            lidx, ridx = self._engine.mapping.inner_join_unique(other._values)
+            indexer = np.argsort(lidx)
+            lidx = lidx[indexer]
+            ridx = ridx[indexer]
+        return lidx, ridx
+
     # --------------------------------------------------------------------
     # Uncategorized Methods
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 8ea2ac24e13c8..b3e93e419cb9b 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1745,6 +1745,8 @@ def get_join_indexers(
         and (left.is_unique or right.is_unique)
     ):
         _, lidx, ridx = left.join(right, how=how, return_indexers=True, sort=sort)
+    elif left._can_use_unique_join(right, how, sort):
+        lidx, ridx = left._unique_join(right)
     else:
         lidx, ridx = get_join_indexers_non_unique(
             left._values, right._values, sort, how

From 6bad79eceecd387d4ab42298dbe62f2ac75a15e7 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 16:07:18 -0500
Subject: [PATCH 02/24] Restrict to other unique

---
 pandas/core/indexes/base.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index bbcdac90256d7..287e3747c9e0c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4707,17 +4707,11 @@ def _can_use_unique_join(self, other: Index, how, sort: bool) -> bool:
             return False
 
         if self.dtype.kind in "iufb" or self.dtype == "O":
-            return other.is_unique or self.is_unique
+            return other.is_unique
         return False
 
     def _unique_join(self, other: Index) -> tuple[np.ndarray, np.ndarray]:
-        if other.is_unique:
-            ridx, lidx = other._engine.mapping.inner_join_unique(self._values)
-        else:
-            lidx, ridx = self._engine.mapping.inner_join_unique(other._values)
-            indexer = np.argsort(lidx)
-            lidx = lidx[indexer]
-            ridx = ridx[indexer]
+        ridx, lidx = other._engine.mapping.inner_join_unique(self._values)
         return lidx, ridx
 
     # --------------------------------------------------------------------

From 8f162eba07104f455fe06f6da24e44546c2d3e3a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 16:43:14 -0500
Subject: [PATCH 03/24] Fixup

---
 asv_bench/benchmarks/join_merge.py | 10 ++++------
 pandas/core/indexes/base.py        |  4 ++--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index b5a1007b566c9..03c0f92d766c6 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -234,10 +234,10 @@ def time_i8merge(self, how):
 
 
 class UniqueMerge:
-    params = [["left", "right"], [4_000_000, 1_000_000]]
-    param_names = ["unique", "unique_elements"]
+    params = [4_000_000, 1_000_000]
+    param_names = ["unique_elements"]
 
-    def setup(self, unique, unique_elements):
+    def setup(self, unique_elements):
         N = 1_000_000
         self.left = DataFrame({"a": np.random.randint(1, unique_elements, (N,))})
         self.right = DataFrame({"a": np.random.randint(1, unique_elements, (N,))})
@@ -245,10 +245,8 @@ def setup(self, unique, unique_elements):
         self.right["a"] = concat(
             [uniques, Series(np.arange(0, -(N - len(uniques)), -1))], ignore_index=True
         )
-        if unique == "left":
-            self.left, self.right = self.right, self.left
 
-    def time_unique_merge(self, unique, unique_elements):
+    def time_unique_merge(self, unique_elements):
         merge(self.left, self.right, how="inner")
 
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 287e3747c9e0c..f5f9bb1830c88 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4706,8 +4706,8 @@ def _can_use_unique_join(self, other: Index, how, sort: bool) -> bool:
             # Not worth it if we're going to sort the result
             return False
 
-        if self.dtype.kind in "iufb" or self.dtype == "O":
-            return other.is_unique
+        if self.dtype.kind in "iufb":
+            return other.is_unique and other._engine.is_mapping_populated
         return False
 
     def _unique_join(self, other: Index) -> tuple[np.ndarray, np.ndarray]:

From 2a52d2ac96bdd97fd5c5ca19906fc9d997e6aac4 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 17:26:35 -0500
Subject: [PATCH 04/24] Rewrite unique computation

---
 pandas/_libs/hashtable.pyx                 |  4 +-
 pandas/_libs/hashtable_class_helper.pxi.in | 47 +++++++++++++++++++++-
 pandas/core/indexes/base.py                | 16 --------
 pandas/core/reshape/merge.py               | 35 +++++++++++-----
 4 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 8250d0242c31f..d78dfef41095e 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -75,7 +75,7 @@ cdef class Factorizer:
     def get_count(self) -> int:
         return self.count
 
-    def factorize(self, values, na_sentinel=-1, na_value=None, mask=None) -> np.ndarray:
+    def factorize(self, values, mask=None, na_sentinel=-1, na_value=None) -> np.ndarray:
         raise NotImplementedError
 
 
@@ -89,7 +89,7 @@ cdef class ObjectFactorizer(Factorizer):
         self.uniques = ObjectVector()
 
     def factorize(
-        self, ndarray[object] values, na_sentinel=-1, na_value=None, mask=None
+        self, ndarray[object] values, mask=None, na_sentinel=-1, na_value=None
     ) -> np.ndarray:
         """
 
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index ae2932a526ce3..7dbf4027f448d 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -545,6 +545,49 @@ cdef class {{name}}HashTable(HashTable):
                     self.table.vals[k] = i
         self.na_position = na_position
 
+    @cython.boundscheck(False)
+    def is_unique(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> bool:
+        cdef:
+            Py_ssize_t i, n = len(values)
+            int ret = 0
+            {{c_type}} val
+            khiter_t k
+            int8_t na_position = self.na_position
+
+        if self.uses_mask and mask is None:
+            raise NotImplementedError  # pragma: no cover
+
+        with nogil:
+            if self.uses_mask:
+                for i in range(n):
+                    if mask[i]:
+                        if na_position != -1:
+                            i = 0
+                            break
+                        na_position = i
+                    else:
+                        val = {{to_c_type}}(values[i])
+                        k = kh_get_{{dtype}}(self.table, val)
+                        if k != self.table.n_buckets:
+                            i = 0
+                            break
+                        k = kh_put_{{dtype}}(self.table, val, &ret)
+                        self.table.vals[k] = i
+            else:
+                for i in range(n):
+                    val = {{to_c_type}}(values[i])
+                    k = kh_get_{{dtype}}(self.table, val)
+                    if k != self.table.n_buckets:
+                        i = 0
+                        break
+                    k = kh_put_{{dtype}}(self.table, val, &ret)
+                    self.table.vals[k] = i
+        self.na_position = na_position
+        if i == 0:
+            return False
+        else:
+            return True
+
     @cython.boundscheck(False)
     def inner_join_unique(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> tuple[ndarray, ndarray]:
         cdef:
@@ -903,8 +946,8 @@ cdef class {{name}}Factorizer(Factorizer):
         self.table = {{name}}HashTable(size_hint)
         self.uniques = {{name}}Vector()
 
-    def factorize(self, const {{c_type}}[:] values,
-                  na_sentinel=-1, na_value=None, object mask=None) -> np.ndarray:
+    def factorize(self, const {{c_type}}[:] values, object mask=None,
+                  na_sentinel=-1, na_value=None) -> np.ndarray:
         """
         Returns
         -------
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f5f9bb1830c88..3c01778e05f3d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4698,22 +4698,6 @@ def _can_use_libjoin(self) -> bool:
         #  Doing so seems to break test_concat_datetime_timezone
         return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex))
 
-    @final
-    def _can_use_unique_join(self, other: Index, how, sort: bool) -> bool:
-        if how != "inner":
-            return False
-        if sort:
-            # Not worth it if we're going to sort the result
-            return False
-
-        if self.dtype.kind in "iufb":
-            return other.is_unique and other._engine.is_mapping_populated
-        return False
-
-    def _unique_join(self, other: Index) -> tuple[np.ndarray, np.ndarray]:
-        ridx, lidx = other._engine.mapping.inner_join_unique(self._values)
-        return lidx, ridx
-
     # --------------------------------------------------------------------
     # Uncategorized Methods
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index b3e93e419cb9b..4728a3fee6baf 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1739,15 +1739,24 @@ def get_join_indexers(
     left = Index(lkey)
     right = Index(rkey)
 
+    computed = False
+
     if (
         left.is_monotonic_increasing
         and right.is_monotonic_increasing
         and (left.is_unique or right.is_unique)
     ):
         _, lidx, ridx = left.join(right, how=how, return_indexers=True, sort=sort)
-    elif left._can_use_unique_join(right, how, sort):
-        lidx, ridx = left._unique_join(right)
-    else:
+        computed = True
+    elif how == "inner" and not sort and right.dtype.kind in "iufb":
+        arr, mask = _convert_arrays_for_rizer(rkey)
+        htable, arr = algos._get_hashtable_algo(arr)
+        tbl = htable()
+        if tbl.is_unique(arr, mask):
+            ridx, lidx = tbl.inner_join_unique(*_convert_arrays_for_rizer(lkey))
+            computed = True
+
+    if not computed:
         lidx, ridx = get_join_indexers_non_unique(
             left._values, right._values, sort, how
         )
@@ -2533,18 +2542,14 @@ def _factorize_keys(
 
     if isinstance(lk, BaseMaskedArray):
         assert isinstance(rk, BaseMaskedArray)
-        llab = rizer.factorize(lk._data, mask=lk._mask)
-        rlab = rizer.factorize(rk._data, mask=rk._mask)
+        llab = rizer.factorize(*_convert_arrays_for_rizer(lk))
+        rlab = rizer.factorize(*_convert_arrays_for_rizer(rk))
     elif isinstance(lk, ArrowExtensionArray):
         assert isinstance(rk, ArrowExtensionArray)
         # we can only get here with numeric dtypes
         # TODO: Remove when we have a Factorizer for Arrow
-        llab = rizer.factorize(
-            lk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype), mask=lk.isna()
-        )
-        rlab = rizer.factorize(
-            rk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype), mask=rk.isna()
-        )
+        llab = rizer.factorize(*_convert_arrays_for_rizer(lk))
+        rlab = rizer.factorize(*_convert_arrays_for_rizer(rk))
     else:
         # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
         # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
@@ -2576,6 +2581,14 @@ def _factorize_keys(
     return llab, rlab, count
 
 
+def _convert_arrays_for_rizer(arr):
+    if isinstance(arr, BaseMaskedArray):
+        return arr._data, arr._mask
+    elif isinstance(arr, ArrowExtensionArray):
+        return arr.to_numpy(na_value=1, dtype=arr.dtype.numpy_dtype), arr.isna()
+    return arr, None
+
+
 def _convert_arrays_and_get_rizer_klass(
     lk: ArrayLike, rk: ArrayLike
 ) -> tuple[type[libhashtable.Factorizer], ArrayLike, ArrayLike]:

From 38018950b124a601cb5471c15fa65b2a47053d20 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 17:32:53 -0500
Subject: [PATCH 05/24] Improve performance

---
 pandas/_libs/hashtable_class_helper.pxi.in | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 7dbf4027f448d..2012b21061f18 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -567,21 +567,19 @@ cdef class {{name}}HashTable(HashTable):
                         na_position = i
                     else:
                         val = {{to_c_type}}(values[i])
-                        k = kh_get_{{dtype}}(self.table, val)
-                        if k != self.table.n_buckets:
-                            i = 0
-                            break
                         k = kh_put_{{dtype}}(self.table, val, &ret)
                         self.table.vals[k] = i
             else:
                 for i in range(n):
                     val = {{to_c_type}}(values[i])
-                    k = kh_get_{{dtype}}(self.table, val)
-                    if k != self.table.n_buckets:
-                        i = 0
-                        break
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = i
+
+                    if i % 10_000 == 0:
+                        if self.table.size != i + 1:
+                            i = 0
+                            break
+
         self.na_position = na_position
         if i == 0:
             return False

From 223f136e6c99d3326b6278eff0857eee8f6618a5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 17:35:05 -0500
Subject: [PATCH 06/24] Improve performance

---
 pandas/_libs/hashtable_class_helper.pxi.in | 5 -----
 pandas/core/reshape/merge.py               | 3 ++-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 2012b21061f18..def8bf576a490 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -575,11 +575,6 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = i
 
-                    if i % 10_000 == 0:
-                        if self.table.size != i + 1:
-                            i = 0
-                            break
-
         self.na_position = na_position
         if i == 0:
             return False
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 4728a3fee6baf..3bb0fcebecafb 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1752,7 +1752,8 @@ def get_join_indexers(
         arr, mask = _convert_arrays_for_rizer(rkey)
         htable, arr = algos._get_hashtable_algo(arr)
         tbl = htable()
-        if tbl.is_unique(arr, mask):
+        tbl.is_unique(arr, mask)
+        if len(tbl) == len(arr):
             ridx, lidx = tbl.inner_join_unique(*_convert_arrays_for_rizer(lkey))
             computed = True
 

From 1a3bf1b15dc7535dcffd8599804a5400f3a27a96 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 17:40:07 -0500
Subject: [PATCH 07/24] Improve performance

---
 pandas/_libs/hashtable_class_helper.pxi.in | 5 +++++
 pandas/core/reshape/merge.py               | 3 +--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index def8bf576a490..2012b21061f18 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -575,6 +575,11 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = i
 
+                    if i % 10_000 == 0:
+                        if self.table.size != i + 1:
+                            i = 0
+                            break
+
         self.na_position = na_position
         if i == 0:
             return False
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 3bb0fcebecafb..4728a3fee6baf 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1752,8 +1752,7 @@ def get_join_indexers(
         arr, mask = _convert_arrays_for_rizer(rkey)
         htable, arr = algos._get_hashtable_algo(arr)
         tbl = htable()
-        tbl.is_unique(arr, mask)
-        if len(tbl) == len(arr):
+        if tbl.is_unique(arr, mask):
             ridx, lidx = tbl.inner_join_unique(*_convert_arrays_for_rizer(lkey))
             computed = True
 

From c0f3532b8609fb71674bce4c1072e511feef5b1a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 17:47:44 -0500
Subject: [PATCH 08/24] Improve performance

---
 pandas/core/reshape/merge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 4728a3fee6baf..b94579a370769 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1751,7 +1751,7 @@ def get_join_indexers(
     elif how == "inner" and not sort and right.dtype.kind in "iufb":
         arr, mask = _convert_arrays_for_rizer(rkey)
         htable, arr = algos._get_hashtable_algo(arr)
-        tbl = htable()
+        tbl = htable(len(arr))
         if tbl.is_unique(arr, mask):
             ridx, lidx = tbl.inner_join_unique(*_convert_arrays_for_rizer(lkey))
             computed = True

From 80a1cf8e332b29a011125c060b0b13762c183bb1 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Thu, 21 Mar 2024 18:22:42 -0500
Subject: [PATCH 09/24] Integrate better

---
 pandas/_libs/hashtable_class_helper.pxi.in |  3 ++
 pandas/core/reshape/merge.py               | 47 +++++++++++++---------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 2012b21061f18..de36c353a1d17 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -972,6 +972,9 @@ cdef class {{name}}Factorizer(Factorizer):
         self.count = len(self.uniques)
         return labels
 
+    def inner_join_unique(self, const {{c_type}}[:] values, const uint8_t[:] mask = None) -> tuple[np.ndarray, np.ndarray]:
+        return self.table.inner_join_unique(values, mask)
+
 {{endfor}}
 
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index b94579a370769..cd6afdc2d84c0 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1739,24 +1739,13 @@ def get_join_indexers(
     left = Index(lkey)
     right = Index(rkey)
 
-    computed = False
-
     if (
         left.is_monotonic_increasing
         and right.is_monotonic_increasing
         and (left.is_unique or right.is_unique)
     ):
         _, lidx, ridx = left.join(right, how=how, return_indexers=True, sort=sort)
-        computed = True
-    elif how == "inner" and not sort and right.dtype.kind in "iufb":
-        arr, mask = _convert_arrays_for_rizer(rkey)
-        htable, arr = algos._get_hashtable_algo(arr)
-        tbl = htable(len(arr))
-        if tbl.is_unique(arr, mask):
-            ridx, lidx = tbl.inner_join_unique(*_convert_arrays_for_rizer(lkey))
-            computed = True
-
-    if not computed:
+    else:
         lidx, ridx = get_join_indexers_non_unique(
             left._values, right._values, sort, how
         )
@@ -1791,7 +1780,12 @@ def get_join_indexers_non_unique(
     np.ndarray[np.intp]
         Indexer into right.
     """
-    lkey, rkey, count = _factorize_keys(left, right, sort=sort)
+    lkey, rkey, count = _factorize_keys(
+        left, right, sort=sort, how=how, unique_merge=True
+    )
+    if count == -1:
+        # unique merge
+        return lkey, rkey
     if how == "left":
         lidx, ridx = libjoin.left_outer_join(lkey, rkey, count, sort=sort)
     elif how == "right":
@@ -2396,7 +2390,11 @@ def _left_join_on_index(
 
 
 def _factorize_keys(
-    lk: ArrayLike, rk: ArrayLike, sort: bool = True
+    lk: ArrayLike,
+    rk: ArrayLike,
+    sort: bool = True,
+    how: str = "inner",
+    unique_merge: bool = False,
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
     """
     Encode left and right keys as enumerated types.
@@ -2542,20 +2540,31 @@ def _factorize_keys(
 
     if isinstance(lk, BaseMaskedArray):
         assert isinstance(rk, BaseMaskedArray)
-        llab = rizer.factorize(*_convert_arrays_for_rizer(lk))
-        rlab = rizer.factorize(*_convert_arrays_for_rizer(rk))
+        lk_data = lk._data
+        lk_mask = lk._mask
+        rlab = rizer.factorize(rk._data, mask=rk._mask)
     elif isinstance(lk, ArrowExtensionArray):
         assert isinstance(rk, ArrowExtensionArray)
         # we can only get here with numeric dtypes
         # TODO: Remove when we have a Factorizer for Arrow
-        llab = rizer.factorize(*_convert_arrays_for_rizer(lk))
-        rlab = rizer.factorize(*_convert_arrays_for_rizer(rk))
+        lk_data = lk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype)
+        lk_mask = lk.isna()
+        rlab = rizer.factorize(
+            rk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype), mask=rk.isna()
+        )
     else:
         # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
         # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
         # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]"
-        llab = rizer.factorize(lk)  # type: ignore[arg-type]
+        lk_data = lk
+        lk_mask = None
         rlab = rizer.factorize(rk)  # type: ignore[arg-type]
+
+    if not sort and how == "inner" and rizer.get_count() == len(rlab) and unique_merge:
+        ridx, lidx = rizer.inner_join_unique(lk_data, lk_mask)
+        return lidx, ridx, -1
+
+    llab = rizer.factorize(lk_data, mask=lk_mask)
     assert llab.dtype == np.dtype(np.intp), llab.dtype
     assert rlab.dtype == np.dtype(np.intp), rlab.dtype
 

From b2d11e305dc7ad4aa7d73bc878e2866922ddd312 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 22 Mar 2024 16:35:40 -0500
Subject: [PATCH 10/24] Fix bugs

---
 pandas/_libs/hashtable.pyi                 |  2 +-
 pandas/_libs/hashtable.pyx                 |  4 +-
 pandas/_libs/hashtable_class_helper.pxi.in | 45 +---------------------
 pandas/core/reshape/merge.py               | 43 ++++++++++++---------
 pandas/tests/reshape/merge/test_merge.py   | 18 ++++-----
 5 files changed, 38 insertions(+), 74 deletions(-)

diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi
index 3725bfa3362d9..d3fd033935112 100644
--- a/pandas/_libs/hashtable.pyi
+++ b/pandas/_libs/hashtable.pyi
@@ -16,7 +16,7 @@ def unique_label_indices(
 class Factorizer:
     count: int
     uniques: Any
-    def __init__(self, size_hint: int) -> None: ...
+    def __init__(self, size_hint: int, uses_mask: bool = False) -> None: ...
     def get_count(self) -> int: ...
     def factorize(
         self,
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index d78dfef41095e..139d86bd4f268 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -69,7 +69,7 @@ cdef class Factorizer:
     cdef readonly:
         Py_ssize_t count
 
-    def __cinit__(self, size_hint: int):
+    def __cinit__(self, size_hint: int, uses_mask: bool = False):
         self.count = 0
 
     def get_count(self) -> int:
@@ -84,7 +84,7 @@ cdef class ObjectFactorizer(Factorizer):
         PyObjectHashTable table
         ObjectVector uniques
 
-    def __cinit__(self, size_hint: int):
+    def __cinit__(self, size_hint: int, uses_mask: bool = False):
         self.table = PyObjectHashTable(size_hint)
         self.uniques = ObjectVector()
 
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index de36c353a1d17..704d2d83dd8a8 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -545,47 +545,6 @@ cdef class {{name}}HashTable(HashTable):
                     self.table.vals[k] = i
         self.na_position = na_position
 
-    @cython.boundscheck(False)
-    def is_unique(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> bool:
-        cdef:
-            Py_ssize_t i, n = len(values)
-            int ret = 0
-            {{c_type}} val
-            khiter_t k
-            int8_t na_position = self.na_position
-
-        if self.uses_mask and mask is None:
-            raise NotImplementedError  # pragma: no cover
-
-        with nogil:
-            if self.uses_mask:
-                for i in range(n):
-                    if mask[i]:
-                        if na_position != -1:
-                            i = 0
-                            break
-                        na_position = i
-                    else:
-                        val = {{to_c_type}}(values[i])
-                        k = kh_put_{{dtype}}(self.table, val, &ret)
-                        self.table.vals[k] = i
-            else:
-                for i in range(n):
-                    val = {{to_c_type}}(values[i])
-                    k = kh_put_{{dtype}}(self.table, val, &ret)
-                    self.table.vals[k] = i
-
-                    if i % 10_000 == 0:
-                        if self.table.size != i + 1:
-                            i = 0
-                            break
-
-        self.na_position = na_position
-        if i == 0:
-            return False
-        else:
-            return True
-
     @cython.boundscheck(False)
     def inner_join_unique(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> tuple[ndarray, ndarray]:
         cdef:
@@ -940,8 +899,8 @@ cdef class {{name}}Factorizer(Factorizer):
         {{name}}HashTable table
         {{name}}Vector uniques
 
-    def __cinit__(self, size_hint: int):
-        self.table = {{name}}HashTable(size_hint)
+    def __cinit__(self, size_hint: int, uses_mask: bool = False):
+        self.table = {{name}}HashTable(size_hint, uses_mask=uses_mask)
         self.uniques = {{name}}Vector()
 
     def factorize(self, const {{c_type}}[:] values, object mask=None,
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index cd6afdc2d84c0..f504910e32bf0 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1781,7 +1781,7 @@ def get_join_indexers_non_unique(
         Indexer into right.
     """
     lkey, rkey, count = _factorize_keys(
-        left, right, sort=sort, how=how, unique_merge=True
+        left, right, sort=sort, try_unique_merge=not sort and how == "inner"
     )
     if count == -1:
         # unique merge
@@ -2393,8 +2393,7 @@ def _factorize_keys(
     lk: ArrayLike,
     rk: ArrayLike,
     sort: bool = True,
-    how: str = "inner",
-    unique_merge: bool = False,
+    try_unique_merge: bool = False,
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
     """
     Encode left and right keys as enumerated types.
@@ -2536,35 +2535,41 @@ def _factorize_keys(
 
     klass, lk, rk = _convert_arrays_and_get_rizer_klass(lk, rk)
 
-    rizer = klass(max(len(lk), len(rk)))
+    rizer = klass(
+        max(len(lk), len(rk)),
+        uses_mask=isinstance(rk, (BaseMaskedArray, ArrowExtensionArray)),
+    )
 
     if isinstance(lk, BaseMaskedArray):
         assert isinstance(rk, BaseMaskedArray)
-        lk_data = lk._data
-        lk_mask = lk._mask
-        rlab = rizer.factorize(rk._data, mask=rk._mask)
+        lk_data, lk_mask = lk._data, lk._mask
+        rk_data, rk_mask = rk._data, rk._mask
     elif isinstance(lk, ArrowExtensionArray):
         assert isinstance(rk, ArrowExtensionArray)
         # we can only get here with numeric dtypes
         # TODO: Remove when we have a Factorizer for Arrow
         lk_data = lk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype)
-        lk_mask = lk.isna()
-        rlab = rizer.factorize(
-            rk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype), mask=rk.isna()
-        )
+        rk_data = rk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype)
+        lk_mask, rk_mask = lk.isna(), rk.isna()
     else:
         # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
         # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
         # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]"
-        lk_data = lk
-        lk_mask = None
-        rlab = rizer.factorize(rk)  # type: ignore[arg-type]
-
-    if not sort and how == "inner" and rizer.get_count() == len(rlab) and unique_merge:
-        ridx, lidx = rizer.inner_join_unique(lk_data, lk_mask)
-        return lidx, ridx, -1
+        lk_data, rk_data = lk, rk
+        lk_mask, rk_mask = None, None
+
+    try_unique_merge = try_unique_merge and lk.dtype.kind in "iufb"
+    if try_unique_merge:
+        rlab = rizer.factorize(rk_data, mask=rk_mask)
+        if rizer.get_count() == len(rlab):
+            ridx, lidx = rizer.inner_join_unique(lk_data, lk_mask)
+            return lidx, ridx, -1
+        else:
+            llab = rizer.factorize(lk_data, mask=lk_mask)
+    else:
+        llab = rizer.factorize(lk_data, mask=lk_mask)
+        rlab = rizer.factorize(rk_data, mask=rk_mask)
 
-    llab = rizer.factorize(lk_data, mask=lk_mask)
     assert llab.dtype == np.dtype(np.intp), llab.dtype
     assert rlab.dtype == np.dtype(np.intp), rlab.dtype
 
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index f063f333ac889..ab3c5560da02b 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2924,21 +2924,21 @@ def test_merge_ea_int_and_float_numpy():
     df2 = DataFrame([1.5])
     expected = DataFrame(columns=[0], dtype="Int64")
 
-    with tm.assert_produces_warning(UserWarning, match="You are merging"):
-        result = df1.merge(df2)
-    tm.assert_frame_equal(result, expected)
-
-    with tm.assert_produces_warning(UserWarning, match="You are merging"):
-        result = df2.merge(df1)
-    tm.assert_frame_equal(result, expected.astype("float64"))
+    # with tm.assert_produces_warning(UserWarning, match="You are merging"):
+    #     result = df1.merge(df2)
+    # tm.assert_frame_equal(result, expected)
+    #
+    # with tm.assert_produces_warning(UserWarning, match="You are merging"):
+    #     result = df2.merge(df1)
+    # tm.assert_frame_equal(result, expected.astype("float64"))
 
     df2 = DataFrame([1.0])
     expected = DataFrame([1], columns=[0], dtype="Int64")
     result = df1.merge(df2)
     tm.assert_frame_equal(result, expected)
 
-    result = df2.merge(df1)
-    tm.assert_frame_equal(result, expected.astype("float64"))
+    # result = df2.merge(df1)
+    # tm.assert_frame_equal(result, expected.astype("float64"))
 
 
 def test_merge_arrow_string_index(any_string_dtype):

From 3b6b787e5bfde617e093aa17419cedeb03bb8b67 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 22 Mar 2024 16:35:58 -0500
Subject: [PATCH 11/24] Fix bugs

---
 pandas/tests/reshape/merge/test_merge.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index ab3c5560da02b..f063f333ac889 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2924,21 +2924,21 @@ def test_merge_ea_int_and_float_numpy():
     df2 = DataFrame([1.5])
     expected = DataFrame(columns=[0], dtype="Int64")
 
-    # with tm.assert_produces_warning(UserWarning, match="You are merging"):
-    #     result = df1.merge(df2)
-    # tm.assert_frame_equal(result, expected)
-    #
-    # with tm.assert_produces_warning(UserWarning, match="You are merging"):
-    #     result = df2.merge(df1)
-    # tm.assert_frame_equal(result, expected.astype("float64"))
+    with tm.assert_produces_warning(UserWarning, match="You are merging"):
+        result = df1.merge(df2)
+    tm.assert_frame_equal(result, expected)
+
+    with tm.assert_produces_warning(UserWarning, match="You are merging"):
+        result = df2.merge(df1)
+    tm.assert_frame_equal(result, expected.astype("float64"))
 
     df2 = DataFrame([1.0])
     expected = DataFrame([1], columns=[0], dtype="Int64")
     result = df1.merge(df2)
     tm.assert_frame_equal(result, expected)
 
-    # result = df2.merge(df1)
-    # tm.assert_frame_equal(result, expected.astype("float64"))
+    result = df2.merge(df1)
+    tm.assert_frame_equal(result, expected.astype("float64"))
 
 
 def test_merge_arrow_string_index(any_string_dtype):

From 616dfc8bbbf41292f1664244478754047ae66cb8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 22 Mar 2024 17:16:29 -0500
Subject: [PATCH 12/24] Rename method

---
 asv_bench/benchmarks/join_merge.py         | 95 ++++++++++++++++++++++
 pandas/_libs/hashtable.pyi                 |  6 ++
 pandas/_libs/hashtable_class_helper.pxi.in |  6 +-
 pandas/core/reshape/merge.py               |  4 +-
 4 files changed, 106 insertions(+), 5 deletions(-)

diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index 03c0f92d766c6..a6c6990892d38 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -20,6 +20,101 @@
     from pandas import ordered_merge as merge_ordered
 
 
+class Concat:
+    params = [0, 1]
+    param_names = ["axis"]
+
+    def setup(self, axis):
+        N = 1000
+        s = Series(N, index=Index([f"i-{i}" for i in range(N)], dtype=object))
+        self.series = [s[i:-i] for i in range(1, 10)] * 50
+        self.small_frames = [DataFrame(np.random.randn(5, 4))] * 1000
+        df = DataFrame(
+            {"A": range(N)}, index=date_range("20130101", periods=N, freq="s")
+        )
+        self.empty_left = [DataFrame(), df]
+        self.empty_right = [df, DataFrame()]
+        self.mixed_ndims = [df, df.head(N // 2)]
+
+    def time_concat_series(self, axis):
+        concat(self.series, axis=axis, sort=False)
+
+    def time_concat_small_frames(self, axis):
+        concat(self.small_frames, axis=axis)
+
+    def time_concat_empty_right(self, axis):
+        concat(self.empty_right, axis=axis)
+
+    def time_concat_empty_left(self, axis):
+        concat(self.empty_left, axis=axis)
+
+    def time_concat_mixed_ndims(self, axis):
+        concat(self.mixed_ndims, axis=axis)
+
+
+class ConcatDataFrames:
+    params = ([0, 1], [True, False])
+    param_names = ["axis", "ignore_index"]
+
+    def setup(self, axis, ignore_index):
+        frame_c = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="C"))
+        self.frame_c = [frame_c] * 20
+        frame_f = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="F"))
+        self.frame_f = [frame_f] * 20
+
+    def time_c_ordered(self, axis, ignore_index):
+        concat(self.frame_c, axis=axis, ignore_index=ignore_index)
+
+    def time_f_ordered(self, axis, ignore_index):
+        concat(self.frame_f, axis=axis, ignore_index=ignore_index)
+
+
+class ConcatIndexDtype:
+    params = (
+        [
+            "datetime64[ns]",
+            "int64",
+            "Int64",
+            "int64[pyarrow]",
+            "string[python]",
+            "string[pyarrow]",
+        ],
+        ["monotonic", "non_monotonic", "has_na"],
+        [0, 1],
+        [True, False],
+    )
+    param_names = ["dtype", "structure", "axis", "sort"]
+
+    def setup(self, dtype, structure, axis, sort):
+        N = 10_000
+        if dtype == "datetime64[ns]":
+            vals = date_range("1970-01-01", periods=N)
+        elif dtype in ("int64", "Int64", "int64[pyarrow]"):
+            vals = np.arange(N, dtype=np.int64)
+        elif dtype in ("string[python]", "string[pyarrow]"):
+            vals = Index([f"i-{i}" for i in range(N)], dtype=object)
+        else:
+            raise NotImplementedError
+
+        idx = Index(vals, dtype=dtype)
+
+        if structure == "monotonic":
+            idx = idx.sort_values()
+        elif structure == "non_monotonic":
+            idx = idx[::-1]
+        elif structure == "has_na":
+            if not idx._can_hold_na:
+                raise NotImplementedError
+            idx = Index([None], dtype=dtype).append(idx)
+        else:
+            raise NotImplementedError
+
+        self.series = [Series(i, idx[:-i]) for i in range(1, 6)]
+
+    def time_concat_series(self, dtype, structure, axis, sort):
+        concat(self.series, axis=axis, sort=sort)
+
+
 class Join:
     params = [True, False]
     param_names = ["sort"]
diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi
index d3fd033935112..7a810a988e50e 100644
--- a/pandas/_libs/hashtable.pyi
+++ b/pandas/_libs/hashtable.pyi
@@ -25,6 +25,9 @@ class Factorizer:
         na_value=...,
         mask=...,
     ) -> npt.NDArray[np.intp]: ...
+    def hash_inner_join(
+        self, values: np.ndarray, mask=...
+    ) -> tuple[np.ndarray, np.ndarray]: ...
 
 class ObjectFactorizer(Factorizer):
     table: PyObjectHashTable
@@ -216,6 +219,9 @@ class HashTable:
         mask=...,
         ignore_na: bool = True,
     ) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ...  # np.ndarray[subclass-specific]
+    def hash_inner_join(
+        self, values: np.ndarray, mask=...
+    ) -> tuple[np.ndarray, np.ndarray]: ...
 
 class Complex128HashTable(HashTable): ...
 class Complex64HashTable(HashTable): ...
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 704d2d83dd8a8..24b4690bdf12a 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -546,7 +546,7 @@ cdef class {{name}}HashTable(HashTable):
         self.na_position = na_position
 
     @cython.boundscheck(False)
-    def inner_join_unique(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> tuple[ndarray, ndarray]:
+    def hash_inner_join(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> tuple[ndarray, ndarray]:
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0, ctr = 0
@@ -931,8 +931,8 @@ cdef class {{name}}Factorizer(Factorizer):
         self.count = len(self.uniques)
         return labels
 
-    def inner_join_unique(self, const {{c_type}}[:] values, const uint8_t[:] mask = None) -> tuple[np.ndarray, np.ndarray]:
-        return self.table.inner_join_unique(values, mask)
+    def hash_inner_join(self, const {{c_type}}[:] values, const uint8_t[:] mask = None) -> tuple[np.ndarray, np.ndarray]:
+        return self.table.hash_inner_join(values, mask)
 
 {{endfor}}
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index f504910e32bf0..b1f03b0d60ba4 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2555,14 +2555,14 @@ def _factorize_keys(
         # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
         # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
         # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]"
-        lk_data, rk_data = lk, rk
+        lk_data, rk_data = lk, rk  # type: ignore[assignment]
         lk_mask, rk_mask = None, None
 
     try_unique_merge = try_unique_merge and lk.dtype.kind in "iufb"
     if try_unique_merge:
         rlab = rizer.factorize(rk_data, mask=rk_mask)
         if rizer.get_count() == len(rlab):
-            ridx, lidx = rizer.inner_join_unique(lk_data, lk_mask)
+            ridx, lidx = rizer.hash_inner_join(lk_data, lk_mask)
             return lidx, ridx, -1
         else:
             llab = rizer.factorize(lk_data, mask=lk_mask)

From 28c7eb8a96b7d8fa8e0de11902d159b3e6238531 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 22 Mar 2024 17:17:16 -0500
Subject: [PATCH 13/24] Rename keyword

---
 pandas/core/reshape/merge.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index b1f03b0d60ba4..a27b5f9929389 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1781,7 +1781,7 @@ def get_join_indexers_non_unique(
         Indexer into right.
     """
     lkey, rkey, count = _factorize_keys(
-        left, right, sort=sort, try_unique_merge=not sort and how == "inner"
+        left, right, sort=sort, hash_join_available=not sort and how == "inner"
     )
     if count == -1:
         # unique merge
@@ -2393,7 +2393,7 @@ def _factorize_keys(
     lk: ArrayLike,
     rk: ArrayLike,
     sort: bool = True,
-    try_unique_merge: bool = False,
+    hash_join_available: bool = False,
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
     """
     Encode left and right keys as enumerated types.
@@ -2558,8 +2558,8 @@ def _factorize_keys(
         lk_data, rk_data = lk, rk  # type: ignore[assignment]
         lk_mask, rk_mask = None, None
 
-    try_unique_merge = try_unique_merge and lk.dtype.kind in "iufb"
-    if try_unique_merge:
+    hash_join_available = hash_join_available and lk.dtype.kind in "iufb"
+    if hash_join_available:
         rlab = rizer.factorize(rk_data, mask=rk_mask)
         if rizer.get_count() == len(rlab):
             ridx, lidx = rizer.hash_inner_join(lk_data, lk_mask)

From 4cb93655235c4c5d3cc41fa7a356c4762d5aca46 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 22 Mar 2024 17:19:20 -0500
Subject: [PATCH 14/24] Add whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index cb211b0b72dce..da123cfaf3dcd 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -282,6 +282,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
+- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)

From f60d8b439d00c72416142cc226fdf0bae27f1d0e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 22 Mar 2024 17:24:44 -0500
Subject: [PATCH 15/24] Clean up

---
 pandas/_libs/hashtable.pyx                 | 4 ++--
 pandas/_libs/hashtable_class_helper.pxi.in | 4 ++--
 pandas/core/reshape/merge.py               | 8 --------
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 139d86bd4f268..b4db762cb083b 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -75,7 +75,7 @@ cdef class Factorizer:
     def get_count(self) -> int:
         return self.count
 
-    def factorize(self, values, mask=None, na_sentinel=-1, na_value=None) -> np.ndarray:
+    def factorize(self, values, na_sentinel=-1, na_value=None, mask=None) -> np.ndarray:
         raise NotImplementedError
 
 
@@ -89,7 +89,7 @@ cdef class ObjectFactorizer(Factorizer):
         self.uniques = ObjectVector()
 
     def factorize(
-        self, ndarray[object] values, mask=None, na_sentinel=-1, na_value=None
+        self, ndarray[object] values, na_sentinel=-1, na_value=None, mask=None
     ) -> np.ndarray:
         """
 
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 24b4690bdf12a..2e9e2b7941995 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -903,8 +903,8 @@ cdef class {{name}}Factorizer(Factorizer):
         self.table = {{name}}HashTable(size_hint, uses_mask=uses_mask)
         self.uniques = {{name}}Vector()
 
-    def factorize(self, const {{c_type}}[:] values, object mask=None,
-                  na_sentinel=-1, na_value=None) -> np.ndarray:
+    def factorize(self, const {{c_type}}[:] values,
+                  na_sentinel=-1, na_value=None, object mask=None) -> np.ndarray:
         """
         Returns
         -------
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index a27b5f9929389..8ce884078cc86 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2595,14 +2595,6 @@ def _factorize_keys(
     return llab, rlab, count
 
 
-def _convert_arrays_for_rizer(arr):
-    if isinstance(arr, BaseMaskedArray):
-        return arr._data, arr._mask
-    elif isinstance(arr, ArrowExtensionArray):
-        return arr.to_numpy(na_value=1, dtype=arr.dtype.numpy_dtype), arr.isna()
-    return arr, None
-
-
 def _convert_arrays_and_get_rizer_klass(
     lk: ArrayLike, rk: ArrayLike
 ) -> tuple[type[libhashtable.Factorizer], ArrayLike, ArrayLike]:

From 243f6c0c38159ddc570045977ba907b30106ffbe Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 22 Mar 2024 20:55:09 -0500
Subject: [PATCH 16/24] Fix typing

---
 pandas/_libs/hashtable.pyx | 3 +++
 scripts/run_stubtest.py    | 1 +
 2 files changed, 4 insertions(+)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index b4db762cb083b..c6b4b88a2dcf9 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -78,6 +78,9 @@ cdef class Factorizer:
     def factorize(self, values, na_sentinel=-1, na_value=None, mask=None) -> np.ndarray:
         raise NotImplementedError
 
+    def hash_inner_join(self, values, mask=None):
+        raise NotImplementedError
+
 
 cdef class ObjectFactorizer(Factorizer):
     cdef public:
diff --git a/scripts/run_stubtest.py b/scripts/run_stubtest.py
index 6307afa1bc822..df88c61061f12 100644
--- a/scripts/run_stubtest.py
+++ b/scripts/run_stubtest.py
@@ -44,6 +44,7 @@
     "pandas._libs.hashtable.HashTable.set_na",
     "pandas._libs.hashtable.HashTable.sizeof",
     "pandas._libs.hashtable.HashTable.unique",
+    "pandas._libs.hashtable.HashTable.hash_inner_join",
     # stubtest might be too sensitive
     "pandas._libs.lib.NoDefault",
     "pandas._libs.lib._NoDefault.no_default",

From 7dfbe159a25858e2d8a3fa35aad89edc3e8ceb0f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:02:47 -0500
Subject: [PATCH 17/24] Update

---
 pandas/_libs/hashtable_class_helper.pxi.in | 30 ++++++++++++++--------
 pandas/core/reshape/merge.py               | 13 +++++-----
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 2e9e2b7941995..a96d5a93884a8 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -545,17 +545,21 @@ cdef class {{name}}HashTable(HashTable):
                     self.table.vals[k] = i
         self.na_position = na_position
 
+    @cython.wraparound(False)
     @cython.boundscheck(False)
     def hash_inner_join(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> tuple[ndarray, ndarray]:
         cdef:
             Py_ssize_t i, n = len(values)
-            int ret = 0, ctr = 0
             {{c_type}} val
             khiter_t k
-            intp_t[::1] locs = np.empty(n, dtype=np.intp)
-            intp_t[::1] self_locs = np.empty(n, dtype=np.intp)
+            Int64Vector locs = Int64Vector(), self_locs = Int64Vector()
+            Int64VectorData *l
+            Int64VectorData *sl
             int8_t na_position = self.na_position
 
+        l = &locs.data
+        sl = &self_locs.data
+
         if self.uses_mask and mask is None:
             raise NotImplementedError  # pragma: no cover
 
@@ -564,18 +568,24 @@ cdef class {{name}}HashTable(HashTable):
                 if self.uses_mask and mask[i]:
                     if self.na_position == -1:
                         continue
-                    self_locs[ctr] = na_position
-                    locs[ctr] = na_position
-                    ctr += 1
+                    if needs_resize(l):
+                        with gil:
+                            locs.resize()
+                            self_locs.resize()
+                    append_data_int64(l, na_position)
+                    append_data_int64(sl, na_position)
                 else:
                     val = {{to_c_type}}(values[i])
                     k = kh_get_{{dtype}}(self.table, val)
                     if k != self.table.n_buckets:
-                        self_locs[ctr] = self.table.vals[k]
-                        locs[ctr] = i
-                        ctr += 1
+                        if needs_resize(l):
+                            with gil:
+                                locs.resize()
+                                self_locs.resize()
+                        append_data_int64(l, i)
+                        append_data_int64(sl, self.table.vals[k])
 
-        return np.asarray(self_locs)[:ctr], np.asarray(locs)[:ctr]
+        return self_locs.to_array(), locs.to_array()
 
     @cython.boundscheck(False)
     def lookup(self, const {{dtype}}_t[:] values, const uint8_t[:] mask = None) -> ndarray:
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 8ce884078cc86..c6a822a330200 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1780,11 +1780,9 @@ def get_join_indexers_non_unique(
     np.ndarray[np.intp]
         Indexer into right.
     """
-    lkey, rkey, count = _factorize_keys(
-        left, right, sort=sort, hash_join_available=not sort and how == "inner"
-    )
+    lkey, rkey, count = _factorize_keys(left, right, sort=sort, how=how)
     if count == -1:
-        # unique merge
+        # hash join
         return lkey, rkey
     if how == "left":
         lidx, ridx = libjoin.left_outer_join(lkey, rkey, count, sort=sort)
@@ -2393,7 +2391,7 @@ def _factorize_keys(
     lk: ArrayLike,
     rk: ArrayLike,
     sort: bool = True,
-    hash_join_available: bool = False,
+    how: str | None = None,
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
     """
     Encode left and right keys as enumerated types.
@@ -2409,6 +2407,9 @@ def _factorize_keys(
     sort : bool, defaults to True
         If True, the encoding is done such that the unique elements in the
         keys are sorted.
+    how: str, optional
+        Used to determine if we can use hash-join. If not given, then just factorize
+        keys.
 
     Returns
     -------
@@ -2558,7 +2559,7 @@ def _factorize_keys(
         lk_data, rk_data = lk, rk  # type: ignore[assignment]
         lk_mask, rk_mask = None, None
 
-    hash_join_available = hash_join_available and lk.dtype.kind in "iufb"
+    hash_join_available = how == "inner" and not sort and lk.dtype.kind in "iufb"
     if hash_join_available:
         rlab = rizer.factorize(rk_data, mask=rk_mask)
         if rizer.get_count() == len(rlab):

From fc0ea082960a98b2ca4cb179db8463a2dd4e5483 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:04:55 -0500
Subject: [PATCH 18/24] Update docs

---
 pandas/core/reshape/merge.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index c6a822a330200..2cd065d03ff53 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2418,7 +2418,8 @@ def _factorize_keys(
     np.ndarray[np.intp]
         Right (resp. left if called with `key='right'`) labels, as enumerated type.
     int
-        Number of unique elements in union of left and right labels.
+        Number of unique elements in union of left and right labels. -1 if we used
+        a hash-join.
 
     See Also
     --------

From 0bd0759ecf8162896b7faed9639bcfd9d9a00c5e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:15:32 -0500
Subject: [PATCH 19/24] Update

---
 pandas/_libs/hashtable_class_helper.pxi.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index a96d5a93884a8..08e9a22c64734 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -568,7 +568,7 @@ cdef class {{name}}HashTable(HashTable):
                 if self.uses_mask and mask[i]:
                     if self.na_position == -1:
                         continue
-                    if needs_resize(l):
+                    if needs_resize(l) or needs_resize(sl):
                         with gil:
                             locs.resize()
                             self_locs.resize()
@@ -578,7 +578,7 @@ cdef class {{name}}HashTable(HashTable):
                     val = {{to_c_type}}(values[i])
                     k = kh_get_{{dtype}}(self.table, val)
                     if k != self.table.n_buckets:
-                        if needs_resize(l):
+                        if needs_resize(l) or needs_resize(sl):
                             with gil:
                                 locs.resize()
                                 self_locs.resize()

From 97c4c26b6820839d9f706e1ac95572290e7bb86d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:18:59 -0500
Subject: [PATCH 20/24] Update

---
 pandas/_libs/hashtable_class_helper.pxi.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 08e9a22c64734..14ea251e63d1e 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -568,17 +568,17 @@ cdef class {{name}}HashTable(HashTable):
                 if self.uses_mask and mask[i]:
                     if self.na_position == -1:
                         continue
-                    if needs_resize(l) or needs_resize(sl):
+                    if needs_resize(l):
                         with gil:
                             locs.resize()
                             self_locs.resize()
-                    append_data_int64(l, na_position)
+                    append_data_int64(l, i)
                     append_data_int64(sl, na_position)
                 else:
                     val = {{to_c_type}}(values[i])
                     k = kh_get_{{dtype}}(self.table, val)
                     if k != self.table.n_buckets:
-                        if needs_resize(l) or needs_resize(sl):
+                        if needs_resize(l):
                             with gil:
                                 locs.resize()
                                 self_locs.resize()

From a0dd1d3b0048498efbf578a79ce92923c325a490 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:19:48 -0500
Subject: [PATCH 21/24] Update

---
 pandas/_libs/hashtable_class_helper.pxi.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 14ea251e63d1e..9ba722b0fa9ea 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -552,7 +552,8 @@ cdef class {{name}}HashTable(HashTable):
             Py_ssize_t i, n = len(values)
             {{c_type}} val
             khiter_t k
-            Int64Vector locs = Int64Vector(), self_locs = Int64Vector()
+            Int64Vector locs = Int64Vector()
+            Int64Vector self_locs = Int64Vector()
             Int64VectorData *l
             Int64VectorData *sl
             int8_t na_position = self.na_position

From de001a6380bb82dbf2175b684d19e1c502f43951 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:23:06 -0500
Subject: [PATCH 22/24] Update

---
 pandas/_libs/hashtable_class_helper.pxi.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 9ba722b0fa9ea..5c2e64d1aa617 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -569,7 +569,7 @@ cdef class {{name}}HashTable(HashTable):
                 if self.uses_mask and mask[i]:
                     if self.na_position == -1:
                         continue
-                    if needs_resize(l):
+                    if needs_resize(&locs.data):
                         with gil:
                             locs.resize()
                             self_locs.resize()
@@ -579,7 +579,7 @@ cdef class {{name}}HashTable(HashTable):
                     val = {{to_c_type}}(values[i])
                     k = kh_get_{{dtype}}(self.table, val)
                     if k != self.table.n_buckets:
-                        if needs_resize(l):
+                        if needs_resize(&locs.data):
                             with gil:
                                 locs.resize()
                                 self_locs.resize()

From 3bb06046c2873bd110b36559bf33be70963f002a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:26:35 -0500
Subject: [PATCH 23/24] Fixup for changed main

---
 pandas/_libs/hashtable_class_helper.pxi.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 7b8b5fdbcdfa7..e3799a8809ccb 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -581,7 +581,7 @@ cdef class {{name}}HashTable(HashTable):
                 if self.uses_mask and mask[i]:
                     if self.na_position == -1:
                         continue
-                    if needs_resize(&locs.data):
+                    if needs_resize(l.size, l.capacity):
                         with gil:
                             locs.resize()
                             self_locs.resize()
@@ -591,7 +591,7 @@ cdef class {{name}}HashTable(HashTable):
                     val = {{to_c_type}}(values[i])
                     k = kh_get_{{dtype}}(self.table, val)
                     if k != self.table.n_buckets:
-                        if needs_resize(&locs.data):
+                        if needs_resize(l.size, l.capacity):
                             with gil:
                                 locs.resize()
                                 self_locs.resize()

From 6ee2a4cdae8a1ecc7f3d70fa08171eecd6254d03 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 23 Mar 2024 14:34:23 -0500
Subject: [PATCH 24/24] Fixup for changed main

---
 pandas/_libs/hashtable_class_helper.pxi.in | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index e3799a8809ccb..e3a9102fec395 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -583,8 +583,8 @@ cdef class {{name}}HashTable(HashTable):
                         continue
                     if needs_resize(l.size, l.capacity):
                         with gil:
-                            locs.resize()
-                            self_locs.resize()
+                            locs.resize(locs.data.capacity * 4)
+                            self_locs.resize(locs.data.capacity * 4)
                     append_data_int64(l, i)
                     append_data_int64(sl, na_position)
                 else:
@@ -593,8 +593,8 @@ cdef class {{name}}HashTable(HashTable):
                     if k != self.table.n_buckets:
                         if needs_resize(l.size, l.capacity):
                             with gil:
-                                locs.resize()
-                                self_locs.resize()
+                                locs.resize(locs.data.capacity * 4)
+                                self_locs.resize(locs.data.capacity * 4)
                         append_data_int64(l, i)
                         append_data_int64(sl, self.table.vals[k])