From 180536a03bbeefdabacb2a7db2a53215bb39d2c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Wed, 17 May 2023 18:45:32 +0200 Subject: [PATCH] Backport PR #53175: Add `np.intc` to `_factorizers` in `pd.merge` --- doc/source/whatsnew/v2.0.2.rst | 2 +- pandas/core/reshape/merge.py | 4 ++++ pandas/tests/reshape/merge/test_merge.py | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index f3432564233a1..50313a8ca4796 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -14,11 +14,11 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed performance regression in :meth:`GroupBy.apply` (:issue:`53195`) +- Fixed regression in :func:`merge` on Windows when dtype is ``np.intc`` (:issue:`52451`) - Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) -- .. --------------------------------------------------------------------------- .. _whatsnew_202.bug_fixes: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a0207d492023f..096b005d99263 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -123,6 +123,10 @@ np.object_: libhashtable.ObjectFactorizer, } +# See https://github.com/pandas-dev/pandas/issues/52451 +if np.intc is not np.int32: + _factorizers[np.intc] = libhashtable.Int64Factorizer + @Substitution("\nleft : DataFrame or named Series") @Appender(_merge_doc, indents=0) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a4d1bfbaa34be..ee9ceddeaad97 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1464,7 +1464,9 @@ def test_different(self, right_vals): result = merge(left, right, on="A") assert is_object_dtype(result.A.dtype) - @pytest.mark.parametrize("d1", [np.int64, np.int32, np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize( + "d1", [np.int64, np.int32, np.intc, np.int16, np.int8, np.uint8] + ) @pytest.mark.parametrize("d2", [np.int64, np.float64, np.float32, np.float16]) def test_join_multi_dtypes(self, d1, d2): dtype1 = np.dtype(d1)