diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3053625721560..16f0b9ee99909 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -972,6 +972,7 @@ Reshaping - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) - Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) - Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`) +- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) .. _whatsnew_0240.bug_fixes.sparse: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d0c7b66978661..88b1ec7e47bbb 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -36,7 +36,7 @@ ensure_float64, ensure_object, _get_dtype) -from pandas.core.dtypes.missing import na_value_for_dtype +from pandas.core.dtypes.missing import na_value_for_dtype, isnull from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util._decorators import Appender, Substitution @@ -1390,12 +1390,21 @@ def flip(xs): self.right_join_keys[-1]) tolerance = self.tolerance - # we required sortedness in the join keys - msg = "{side} keys must be sorted" + # we required sortedness and non-missingness in the join keys + msg_sorted = "{side} keys must be sorted" + msg_missings = "Merge keys contain null values on {side} side" + if not Index(left_values).is_monotonic: - raise ValueError(msg.format(side='left')) + if isnull(left_values).sum() > 0: + raise ValueError(msg_missings.format(side='left')) + else: + raise ValueError(msg_sorted.format(side='left')) + if not Index(right_values).is_monotonic: - raise ValueError(msg.format(side='right')) + if isnull(right_values).sum() > 0: + raise ValueError(msg_missings.format(side='right')) + else: + raise ValueError(msg_sorted.format(side='right')) # initial type conversion as needed if needs_i8_conversion(left_values): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index c75a6a707cafc..ba0cdda61a12c 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1007,3 +1007,25 @@ def test_merge_datatype_error(self): with tm.assert_raises_regex(MergeError, msg): merge_asof(left, right, on='a') + + def test_merge_on_nans_int(self): + # 23189 + msg = "Merge keys contain null values on left side" + left = pd.DataFrame({'a': [1.0, 5.0, 10.0, 12.0, np.nan], + 'left_val': ['a', 'b', 'c', 'd', 'e']}) + right = pd.DataFrame({'a': [1.0, 5.0, 10.0, 12.0], + 'right_val': [1, 6, 11, 15]}) + + with tm.assert_raises_regex(ValueError, msg): + merge_asof(left, right, on='a') + + def test_merge_on_nans_datetime(self): + # 23189 + msg = "Merge keys contain null values on right side" + left = pd.DataFrame({"a": pd.date_range('20130101', periods=5)}) + date_vals = pd.date_range('20130102', periods=5)\ + .append(pd.Index([None])) + right = pd.DataFrame({"a": date_vals}) + + with tm.assert_raises_regex(ValueError, msg): + merge_asof(left, right, on='a')