diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f246ebad3aa2c..bded5c1b644e9 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -847,6 +847,7 @@ Reshaping - Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) +- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c4305136accb1..d0c7b66978661 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -23,6 +23,7 @@ is_categorical_dtype, is_integer_dtype, is_float_dtype, + is_number, is_numeric_dtype, is_integer, is_int_or_datetime_dtype, @@ -1356,8 +1357,14 @@ def _get_merge_keys(self): if self.tolerance < 0: raise MergeError("tolerance must be positive") + elif is_float_dtype(lt): + if not is_number(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + else: - raise MergeError("key must be integer or timestamp") + raise MergeError("key must be integer, timestamp or float") # validate allow_exact_matches if not is_bool(self.allow_exact_matches): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index d5df9d3820fdc..c75a6a707cafc 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -642,6 +642,21 @@ def test_tolerance_tz(self): 'value2': list("BCDEE")}) assert_frame_equal(result, expected) + def test_tolerance_float(self): + # GH22981 + left = pd.DataFrame({'a': [1.1, 3.5, 10.9], + 'left_val': ['a', 'b', 'c']}) + right = pd.DataFrame({'a': [1.0, 2.5, 3.3, 7.5, 11.5], + 'right_val': [1.0, 2.5, 3.3, 7.5, 11.5]}) + + expected = pd.DataFrame({'a': [1.1, 3.5, 10.9], + 'left_val': ['a', 'b', 'c'], + 'right_val': [1, 3.3, np.nan]}) + + result = pd.merge_asof(left, right, on='a', direction='nearest', + tolerance=0.5) + assert_frame_equal(result, expected) + def test_index_tolerance(self): # GH 15135 expected = self.tolerance.set_index('time')