From 49f864a707307c438c405f394e0a4bc5e96322af Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 27 May 2020 20:28:53 +0200 Subject: [PATCH 01/11] BUG: Merge crashed when left_on is in index, right_index is True and result is empty with inner --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/reshape/merge.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 17a830788be3f..33d31fc2803cc 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -936,6 +936,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - Ensure only named functions can be used in :func:`eval()` (:issue:`32460`) - Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) +- Fixed bug in :func:`merge` where an error was raised when performing an ``inner`` join with ``left_on`` column in Index and ``right_index`` when result was empty (:issue:`33814`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0c796c8f45a52..205e465561442 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -837,7 +837,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): # make sure to just use the right values mask = left_indexer == -1 if mask.all(): - key_col = rvals + key_col = Index(rvals) else: key_col = Index(lvals).where(~mask, rvals) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 4408aa0bbce4a..172600ae1caa0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2227,3 +2227,10 @@ def test_categorical_non_unique_monotonic(n_categories): index=left_index, ) tm.assert_frame_equal(expected, result) + + +def test_merge_empty_right_index_left_on(): + left = pd.DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) + right = pd.DataFrame({"b": [1]}).set_index(["b"]) + result = pd.merge(left, right, left_on=["b"], right_index=True) + assert result.empty From e4da7c5abe9673799b9bfc2c0a383aead84ceaf3 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 27 May 2020 20:31:47 +0200 Subject: [PATCH 02/11] Change whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 33d31fc2803cc..c1775c5688037 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -936,7 +936,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - Ensure only named functions can be used in :func:`eval()` (:issue:`32460`) - Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) -- Fixed bug in :func:`merge` where an error was raised when performing an ``inner`` join with ``left_on`` column in Index and ``right_index`` when result was empty (:issue:`33814`) +- Fixed bug in :func:`merge` where an error was raised when performing an ``inner`` join with partial index and ``right_index`` when result was empty (:issue:`33814`) Sparse ^^^^^^ From 1440bce502d4445854b1411844e29646e94d1fc4 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 27 May 2020 23:19:17 +0200 Subject: [PATCH 03/11] Change whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c1775c5688037..047e5089aec2a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -936,7 +936,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - Ensure only named functions can be used in :func:`eval()` (:issue:`32460`) - Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) -- Fixed bug in :func:`merge` where an error was raised when performing an ``inner`` join with partial index and ``right_index`` when result was empty (:issue:`33814`) +- Fixed bug in :func:`merge` where an error was raised when performing an `inner` join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) Sparse ^^^^^^ From c0da36e136228705cce87e41bf4817a30a2c53b5 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 28 May 2020 02:02:07 +0200 Subject: [PATCH 04/11] Parametrize test --- pandas/tests/reshape/merge/test_merge.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 172600ae1caa0..afebace2bb53b 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2229,8 +2229,19 @@ def test_categorical_non_unique_monotonic(n_categories): tm.assert_frame_equal(expected, result) -def test_merge_empty_right_index_left_on(): - left = pd.DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) - right = pd.DataFrame({"b": [1]}).set_index(["b"]) - result = pd.merge(left, right, left_on=["b"], right_index=True) - assert result.empty +@pytest.mark.parametrize( + ("kwargs", "args"), + [ + ({"left_on": ["b"], "right_index": True}, 1), + ({"left_index": True, "right_on": ["b"]}, -1), + ], +) +def test_merge_empty_right_index_left_on(kwargs, args): + # 33814 + df1 = pd.DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) + df2 = pd.DataFrame({"b": [1]}).set_index(["b"]) + result = pd.merge(*[df1, df2][::args], **kwargs) + expected = pd.DataFrame( + index=MultiIndex(levels=[[int()], [int()]], codes=[[], []], names=["a", "b"]) + ) + tm.assert_frame_equal(result, expected) From b9b47c55e6f37dc7d596ab102b48f062fa4d34bc Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 28 May 2020 02:02:29 +0200 Subject: [PATCH 05/11] Add issue number --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index afebace2bb53b..e6b4ccf587a86 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2237,7 +2237,7 @@ def test_categorical_non_unique_monotonic(n_categories): ], ) def test_merge_empty_right_index_left_on(kwargs, args): - # 33814 + # GH 33814 df1 = pd.DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) df2 = pd.DataFrame({"b": [1]}).set_index(["b"]) result = pd.merge(*[df1, df2][::args], **kwargs) From c32928d7e74ff808c9320819b9679ed1eea426d8 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 28 May 2020 02:04:33 +0200 Subject: [PATCH 06/11] Fix whatsnew issue --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 047e5089aec2a..b2c3c85e26f93 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -936,7 +936,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - Ensure only named functions can be used in :func:`eval()` (:issue:`32460`) - Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) -- Fixed bug in :func:`merge` where an error was raised when performing an `inner` join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) +- Fixed bug in :func:`merge` where an error was raised when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) Sparse ^^^^^^ From 783af3ae810b68499a369976926fb36c45a3ed02 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 1 Jun 2020 01:12:43 +0200 Subject: [PATCH 07/11] Change test, simplify --- pandas/tests/reshape/merge/test_merge.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index e6b4ccf587a86..c6a4dce8ff99e 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2229,19 +2229,13 @@ def test_categorical_non_unique_monotonic(n_categories): tm.assert_frame_equal(expected, result) -@pytest.mark.parametrize( - ("kwargs", "args"), - [ - ({"left_on": ["b"], "right_index": True}, 1), - ({"left_index": True, "right_on": ["b"]}, -1), - ], -) -def test_merge_empty_right_index_left_on(kwargs, args): +def test_merge_empty_right_index_left_on(): # GH 33814 df1 = pd.DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) df2 = pd.DataFrame({"b": [1]}).set_index(["b"]) - result = pd.merge(*[df1, df2][::args], **kwargs) - expected = pd.DataFrame( - index=MultiIndex(levels=[[int()], [int()]], codes=[[], []], names=["a", "b"]) - ) + expected = pd.DataFrame({"a": [], "b": []}, dtype=np.int64).set_index(["a", "b"]) + result = pd.merge(df1, df2, left_on=["b"], right_index=True) + tm.assert_frame_equal(result, expected) + + result = pd.merge(df2, df1, left_index=True, right_on=["b"]) tm.assert_frame_equal(result, expected) From af000eafbdbd479161e5110c237c74af8bc1f01a Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 5 Sep 2020 21:33:24 +0200 Subject: [PATCH 08/11] Move whats new note --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b1229a5d5823d..40bd38c8da7c3 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -322,6 +322,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`) - Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`) - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) +- Fixed bug in :func:`merge` where an error was raised when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) - Sparse From 28557c67cd9b6efe9682292baec96094cba912f4 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 2 Jan 2021 00:50:35 +0100 Subject: [PATCH 09/11] Move whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9d1b3eaebdf8b..968db9de97093 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -293,6 +293,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ +- Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) - Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) - Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) - From efc6eb13acdc8a384008a33e63765fbdb746c7fc Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 2 Jan 2021 00:58:12 +0100 Subject: [PATCH 10/11] Remove pd --- pandas/tests/reshape/merge/test_merge.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 991c8602e3636..88aa4cedc4928 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2379,11 +2379,11 @@ def test_merge_right_left_index(): def test_merge_empty_right_index_left_on(): # GH#33814 - df1 = pd.DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) - df2 = pd.DataFrame({"b": [1]}).set_index(["b"]) - expected = pd.DataFrame({"a": [], "b": []}, dtype=np.int64).set_index(["a", "b"]) - result = pd.merge(df1, df2, left_on=["b"], right_index=True) + df1 = DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) + df2 = DataFrame({"b": [1]}).set_index(["b"]) + expected = DataFrame({"a": [], "b": []}, dtype=np.int64).set_index(["a", "b"]) + result = merge(df1, df2, left_on=["b"], right_index=True) tm.assert_frame_equal(result, expected) - result = pd.merge(df2, df1, left_index=True, right_on=["b"]) + result = merge(df2, df1, left_index=True, right_on=["b"]) tm.assert_frame_equal(result, expected) From d5d6c8ee00d8cff89bfbcc0cf6335ab2ff21d78b Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 3 Jan 2021 19:07:04 +0100 Subject: [PATCH 11/11] Rename test --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 88aa4cedc4928..da3ac81c4aa17 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2377,7 +2377,7 @@ def test_merge_right_left_index(): tm.assert_frame_equal(result, expected) -def test_merge_empty_right_index_left_on(): +def test_merge_result_empty_index_and_on(): # GH#33814 df1 = DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) df2 = DataFrame({"b": [1]}).set_index(["b"])