From 64ab35416e42a276a30f72a80301c83022bef904 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 May 2021 20:16:37 -0700 Subject: [PATCH 1/3] TST: split pd.merge test with indicator=True --- pandas/tests/reshape/merge/test_merge.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index edd100219143c..d3e13d40c9ff2 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -944,13 +944,11 @@ def test_merge_on_periods(self): assert result["value_x"].dtype == "Period[D]" assert result["value_y"].dtype == "Period[D]" - def test_indicator(self): - # PR #10054. xref #7412 and closes #8790. + @pytest.fixture + def dfs_for_indicator(self): df1 = DataFrame( {"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]} ) - df1_copy = df1.copy() - df2 = DataFrame( { "col1": [1, 2, 3, 4, 5], @@ -958,6 +956,13 @@ def test_indicator(self): "col_right": [2, 2, 2, 2, 2], } ) + return df1, df2 + + def test_indicator(self, dfs_for_indicator): + # PR #10054. xref #7412 and closes #8790. + df1, df2 = dfs_for_indicator + df1_copy = df1.copy() + df2_copy = df2.copy() df_result = DataFrame( @@ -1016,14 +1021,19 @@ def test_indicator(self): ) tm.assert_frame_equal(test_custom_name, df_result_custom_name) + def test_merge_indicator_arg_validation(self, dfs_for_indicator): # Check only accepts strings and booleans + df1, df2 = dfs_for_indicator + msg = "indicator option can only accept boolean or string arguments" with pytest.raises(ValueError, match=msg): merge(df1, df2, on="col1", how="outer", indicator=5) with pytest.raises(ValueError, match=msg): df1.merge(df2, on="col1", how="outer", indicator=5) + def test_merge_indicator_result_integrity(self, dfs_for_indicator): # Check result integrity + df1, df2 = dfs_for_indicator test2 = merge(df1, df2, on="col1", how="left", indicator=True) assert (test2._merge != "right_only").all() @@ -1040,7 +1050,10 @@ def test_indicator(self): test4 = df1.merge(df2, on="col1", how="inner", indicator=True) assert (test4._merge == "both").all() + def test_merge_indicator_invalid(self, dfs_for_indicator): # Check if working name in df + df1, _ = dfs_for_indicator + for i in ["_right_indicator", "_left_indicator", "_merge"]: df_badcolumn = DataFrame({"col1": [1, 2], i: [2, 2]}) @@ -1071,6 +1084,7 @@ def test_indicator(self): df_badcolumn, on="col1", how="outer", indicator="custom_column_name" ) + def test_merge_indicator_multiple_columns(self): # Merge on multiple columns df3 = DataFrame({"col1": [0, 1], "col2": ["a", "b"]}) From 1136d52d723f01b139e86b556953247e8d692722 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 07:31:27 -0700 Subject: [PATCH 2/3] TST: split big tests in test_merge --- pandas/tests/reshape/merge/test_merge.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index d3e13d40c9ff2..1156e2a3196ac 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -543,6 +543,7 @@ def check2(exp, kwarg): result = merge(left, right, how="outer", **kwarg) tm.assert_frame_equal(result, exp) + # TODO: should the next loop be un-indented? doing so breaks this test for kwarg in [ {"left_index": True, "right_index": True}, {"left_index": True, "right_on": "x"}, @@ -652,6 +653,7 @@ def test_merge_nan_right(self): ) tm.assert_frame_equal(result, expected, check_dtype=False) + def test_merge_nan_right2(self): df1 = DataFrame({"i1": [0, 1], "i2": [0.5, 1.5]}) df2 = DataFrame({"i1": [0], "i3": [0.7]}) result = df1.join(df2, rsuffix="_", on="i1") @@ -695,6 +697,9 @@ def test_join_append_timedeltas(self, using_array_manager): expected = expected.astype(object) tm.assert_frame_equal(result, expected) + def test_join_append_timedeltas2(self): + # timedelta64 issues with join/merge + # GH 5695 td = np.timedelta64(300000000) lhs = DataFrame(Series([td, td], index=["A", "B"])) rhs = DataFrame(Series([td], index=["A"])) @@ -806,6 +811,7 @@ def test_merge_on_datetime64tz(self): result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) + def test_merge_datetime64tz_values(self): left = DataFrame( { "key": [1, 2], @@ -923,6 +929,7 @@ def test_merge_on_periods(self): result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) + def test_merge_period_values(self): left = DataFrame( {"key": [1, 2], "value": pd.period_range("20151010", periods=2, freq="D")} ) @@ -1552,6 +1559,8 @@ def test_merge_incompat_infer_boolean_object(self): result = merge(df2, df1, on="key") tm.assert_frame_equal(result, expected) + def test_merge_incompat_infer_boolean_object_with_missing(self): + # GH21119: bool + object bool merge OK # with missing value df1 = DataFrame({"key": Series([True, False, np.nan], dtype=object)}) df2 = DataFrame({"key": [True, False]}) From 48ea15b68ff00ef60d617ddb5e883c1f18d25f99 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 10:59:16 -0700 Subject: [PATCH 3/3] move fixture to top --- pandas/tests/reshape/merge/test_merge.py | 27 ++++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 1156e2a3196ac..77b155f01a2ea 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -102,6 +102,19 @@ def series_of_dtype_all_na(request): return request.param +@pytest.fixture +def dfs_for_indicator(): + df1 = DataFrame({"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]}) + df2 = DataFrame( + { + "col1": [1, 2, 3, 4, 5], + "col_conflict": [1, 2, 3, 4, 5], + "col_right": [2, 2, 2, 2, 2], + } + ) + return df1, df2 + + class TestMerge: def setup_method(self, method): # aggregate multiple columns @@ -951,20 +964,6 @@ def test_merge_period_values(self): assert result["value_x"].dtype == "Period[D]" assert result["value_y"].dtype == "Period[D]" - @pytest.fixture - def dfs_for_indicator(self): - df1 = DataFrame( - {"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]} - ) - df2 = DataFrame( - { - "col1": [1, 2, 3, 4, 5], - "col_conflict": [1, 2, 3, 4, 5], - "col_right": [2, 2, 2, 2, 2], - } - ) - return df1, df2 - def test_indicator(self, dfs_for_indicator): # PR #10054. xref #7412 and closes #8790. df1, df2 = dfs_for_indicator