diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 731406394ed46..9416fcb3f9d48 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -477,6 +477,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) +- Bug in ``pd.merge`` on Windows where merging DataFrames with ``np.intc`` or ``np.uintc`` data types caused unexpected behavior or errors (:issue:`58713`) - Sparse diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e6e84c2135b82..9edf38d66a790 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -130,6 +130,9 @@ if np.intc is not np.int32: _factorizers[np.intc] = libhashtable.Int64Factorizer +if np.uintc(0).itemsize == 8: + _factorizers[np.uintc] = libhashtable.UInt64Factorizer + _known = (np.ndarray, ExtensionArray, Index, ABCSeries) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 5c5c06dea0008..de8f087cab0cf 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1523,6 +1523,41 @@ def test_join_multi_dtypes(self, any_int_numpy_dtype, d2): expected.sort_values(["k1", "k2"], kind="mergesort", inplace=True) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "d1", [np.int64, np.int32, np.intc, np.int16, np.int8, np.uint8, np.uintc] + ) + @pytest.mark.parametrize("d2", [np.int64, np.float64, np.float32, np.float16]) + def test_join_multi_dtypes_with_uintc(self, d1, d2): + dtype1 = np.dtype(d1) + dtype2 = np.dtype(d2) + + left = DataFrame( + { + "k1": np.array([0, 1, 2] * 8, dtype=dtype1), + "k2": ["foo", "bar"] * 12, + "v": np.array(np.arange(24), dtype=np.int64), + } + ) + + index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")]) + right = DataFrame({"v2": np.array([5, 7], dtype=dtype2)}, index=index) + + result = left.join(right, on=["k1", "k2"]) + + expected = left.copy() + + if dtype2.kind == "i": + dtype2 = np.dtype("float64") + expected["v2"] = np.array(np.nan, dtype=dtype2) + expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7 + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=["k1", "k2"], sort=True) + expected.sort_values(["k1", "k2"], kind="mergesort", inplace=True) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "int_vals, float_vals, exp_vals", [