From 15df8bbaf32b345b67ebd739902a0fe0f0d8453a Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Wed, 9 Oct 2019 20:54:48 +0200 Subject: [PATCH 1/8] Allow all int types for merge --- pandas/core/reshape/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 910c7ea561929..993ed942eac25 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1641,7 +1641,7 @@ def _get_merge_keys(self): if self.tolerance < Timedelta(0): raise MergeError("tolerance must be positive") - elif is_int64_dtype(lt): + elif is_integer_dtype(lt): if not is_integer(self.tolerance): raise MergeError(msg) if self.tolerance < 0: From 2fdeb9c5dc03e21b28f646fa3047e0a44853e4a0 Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Wed, 9 Oct 2019 21:50:59 +0200 Subject: [PATCH 2/8] Removed now unused is_int64_dtype --- pandas/core/reshape/merge.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 993ed942eac25..7bfc8153da568 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -28,7 +28,6 @@ is_dtype_equal, is_extension_array_dtype, is_float_dtype, - is_int64_dtype, is_integer, is_integer_dtype, is_list_like, From 6dbfa6279322a0c12c05243b84ccdc25c6defb25 Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Wed, 9 Oct 2019 22:45:02 +0200 Subject: [PATCH 3/8] Test case for int32 type when using tolerance --- pandas/tests/reshape/merge/test_merge_asof.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index caf2539a9e150..31939d4698d97 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1287,3 +1287,19 @@ def test_timedelta_tolerance_nearest(self): ) assert_frame_equal(result, expected) + + def test_int_type_tolerance(self): + # GH #28870 + + left = pd.DataFrame({'ts_int': [0, 100, 200], 'left_val': [1, 2, 3]}) + right = pd.DataFrame({'ts_int': [50, 150, 250], 'right_val': [1, 2, 3]}) + left['ts_int'] = left['ts_int'].astype(np.int32) + right['ts_int'] = right['ts_int'].astype(np.int32) + + expected = pd.DataFrame( + {"ts_int": [0, 100, 200], "left_val": [1, 2, 3], + "right_val": [np.nan, 1.0, 2.0]} + ) + + result = pd.merge_asof(left, right, on='ts_int', tolerance=100) + assert_frame_equal(result, expected) From 2ed2dd1be9de758dc32ef98bc01aba7ea9ad4ea2 Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Wed, 9 Oct 2019 22:57:59 +0200 Subject: [PATCH 4/8] Fixed expected type of 'on' col --- pandas/tests/reshape/merge/test_merge_asof.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 31939d4698d97..f6eacc8e5e05f 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1291,15 +1291,16 @@ def test_timedelta_tolerance_nearest(self): def test_int_type_tolerance(self): # GH #28870 - left = pd.DataFrame({'ts_int': [0, 100, 200], 'left_val': [1, 2, 3]}) - right = pd.DataFrame({'ts_int': [50, 150, 250], 'right_val': [1, 2, 3]}) - left['ts_int'] = left['ts_int'].astype(np.int32) - right['ts_int'] = right['ts_int'].astype(np.int32) + left = pd.DataFrame({'a': [0, 100, 200], 'left_val': [1, 2, 3]}) + right = pd.DataFrame({'a': [50, 150, 250], 'right_val': [1, 2, 3]}) + left['a'] = left['a'].astype(np.int32) + right['a'] = right['a'].astype(np.int32) expected = pd.DataFrame( - {"ts_int": [0, 100, 200], "left_val": [1, 2, 3], + {"a": [0, 100, 200], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]} ) + expected['a'] = expected['a'].astype(np.int32) - result = pd.merge_asof(left, right, on='ts_int', tolerance=100) + result = pd.merge_asof(left, right, on='a', tolerance=100) assert_frame_equal(result, expected) From bbd5a931f37078d7b8afdea6795473fa6b7e6d2f Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Wed, 9 Oct 2019 23:00:58 +0200 Subject: [PATCH 5/8] Reformatted test case using black --- pandas/tests/reshape/merge/test_merge_asof.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index f6eacc8e5e05f..206380c5ea46b 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1291,16 +1291,15 @@ def test_timedelta_tolerance_nearest(self): def test_int_type_tolerance(self): # GH #28870 - left = pd.DataFrame({'a': [0, 100, 200], 'left_val': [1, 2, 3]}) - right = pd.DataFrame({'a': [50, 150, 250], 'right_val': [1, 2, 3]}) - left['a'] = left['a'].astype(np.int32) - right['a'] = right['a'].astype(np.int32) + left = pd.DataFrame({"a": [0, 100, 200], "left_val": [1, 2, 3]}) + right = pd.DataFrame({"a": [50, 150, 250], "right_val": [1, 2, 3]}) + left["a"] = left["a"].astype(np.int32) + right["a"] = right["a"].astype(np.int32) expected = pd.DataFrame( - {"a": [0, 100, 200], "left_val": [1, 2, 3], - "right_val": [np.nan, 1.0, 2.0]} + {"a": [0, 100, 200], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]} ) - expected['a'] = expected['a'].astype(np.int32) + expected["a"] = expected["a"].astype(np.int32) - result = pd.merge_asof(left, right, on='a', tolerance=100) + result = pd.merge_asof(left, right, on="a", tolerance=100) assert_frame_equal(result, expected) From e4bfb18e1d211557a53a8bbf5926217984204fe2 Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Wed, 9 Oct 2019 23:09:39 +0200 Subject: [PATCH 6/8] Whatsnew: int dtype tolerance fix in merge_asof --- doc/source/whatsnew/v0.25.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 9789c9fce3541..8ce67866002aa 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -86,7 +86,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Fix to ensure all int dtypes can be used in :meth:`pandas.core.reshape.merge_asof` when using a tolerance value (:issue:`28870`). - - - From d4df46094aeea40db2f9ff84bb5fa13c5b898b74 Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Thu, 10 Oct 2019 01:45:29 +0200 Subject: [PATCH 7/8] TST: Parametrized int type w/ tolerance (GH28870) --- doc/source/whatsnew/v0.25.2.rst | 2 +- pandas/tests/reshape/merge/test_merge_asof.py | 20 +++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 8ce67866002aa..0d5529f83726f 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -86,7 +86,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- Fix to ensure all int dtypes can be used in :meth:`pandas.core.reshape.merge_asof` when using a tolerance value (:issue:`28870`). +- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Before every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). - - - diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 206380c5ea46b..84ca6d369a967 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1288,18 +1288,22 @@ def test_timedelta_tolerance_nearest(self): assert_frame_equal(result, expected) - def test_int_type_tolerance(self): + @pytest.mark.parametrize( + "dtype", + [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], + ) + def test_int_type_tolerance(self, dtype): # GH #28870 - left = pd.DataFrame({"a": [0, 100, 200], "left_val": [1, 2, 3]}) - right = pd.DataFrame({"a": [50, 150, 250], "right_val": [1, 2, 3]}) - left["a"] = left["a"].astype(np.int32) - right["a"] = right["a"].astype(np.int32) + left = pd.DataFrame({"a": [0, 10, 20], "left_val": [1, 2, 3]}) + right = pd.DataFrame({"a": [5, 15, 25], "right_val": [1, 2, 3]}) + left["a"] = left["a"].astype(dtype) + right["a"] = right["a"].astype(dtype) expected = pd.DataFrame( - {"a": [0, 100, 200], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]} + {"a": [0, 10, 20], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]} ) - expected["a"] = expected["a"].astype(np.int32) + expected["a"] = expected["a"].astype(dtype) - result = pd.merge_asof(left, right, on="a", tolerance=100) + result = pd.merge_asof(left, right, on="a", tolerance=10) assert_frame_equal(result, expected) From 3f194fcbcf680ad35deaf81c4b60a75a7d8e0ffa Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Thu, 10 Oct 2019 15:32:14 +0200 Subject: [PATCH 8/8] TST: any int dtype for tolerance merge (GH28870) --- doc/source/whatsnew/v0.25.2.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/tests/reshape/merge/test_merge_asof.py | 12 ++++-------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 0d5529f83726f..9789c9fce3541 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -86,7 +86,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Before every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). +- - - - diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index fd1c1271a5e37..dd96c6b594cea 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -344,6 +344,7 @@ Reshaping - Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) - Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) +- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). Sparse ^^^^^^ diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 84ca6d369a967..2e9ae80323159 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1288,22 +1288,18 @@ def test_timedelta_tolerance_nearest(self): assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "dtype", - [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], - ) - def test_int_type_tolerance(self, dtype): + def test_int_type_tolerance(self, any_int_dtype): # GH #28870 left = pd.DataFrame({"a": [0, 10, 20], "left_val": [1, 2, 3]}) right = pd.DataFrame({"a": [5, 15, 25], "right_val": [1, 2, 3]}) - left["a"] = left["a"].astype(dtype) - right["a"] = right["a"].astype(dtype) + left["a"] = left["a"].astype(any_int_dtype) + right["a"] = right["a"].astype(any_int_dtype) expected = pd.DataFrame( {"a": [0, 10, 20], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]} ) - expected["a"] = expected["a"].astype(dtype) + expected["a"] = expected["a"].astype(any_int_dtype) result = pd.merge_asof(left, right, on="a", tolerance=10) assert_frame_equal(result, expected)