diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index b62e307b1f2a8..88b25f160cdfd 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -209,23 +209,26 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): if name in result: key_col = result[name] - if name in self.left and left_indexer is not None: - na_indexer = (left_indexer == -1).nonzero()[0] - if len(na_indexer) == 0: - continue - - right_na_indexer = right_indexer.take(na_indexer) - key_col.put( - na_indexer, com.take_1d(self.right_join_keys[i], - right_na_indexer)) - elif name in self.right and right_indexer is not None: - na_indexer = (right_indexer == -1).nonzero()[0] - if len(na_indexer) == 0: - continue - - left_na_indexer = left_indexer.take(na_indexer) - key_col.put(na_indexer, com.take_1d(self.left_join_keys[i], - left_na_indexer)) + if left_indexer is not None and right_indexer is not None: + + if name in self.left: + na_indexer = (left_indexer == -1).nonzero()[0] + if len(na_indexer) == 0: + continue + + right_na_indexer = right_indexer.take(na_indexer) + key_col.put( + na_indexer, com.take_1d(self.right_join_keys[i], + right_na_indexer)) + elif name in self.right: + na_indexer = (right_indexer == -1).nonzero()[0] + if len(na_indexer) == 0: + continue + + left_na_indexer = left_indexer.take(na_indexer) + key_col.put(na_indexer, com.take_1d(self.left_join_keys[i], + left_na_indexer)) + elif left_indexer is not None: if name is None: name = 'key_%d' % i diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index d1c4710c16aad..d7abcadcb3778 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -717,6 +717,24 @@ def test_merge_nosort(self): self.assert_((df.var3.unique() == result.var3.unique()).all()) + def test_merge_nan_right(self): + df1 = DataFrame({"i1" : [0, 1], "i2" : [0, 1]}) + df2 = DataFrame({"i1" : [0], "i3" : [0]}) + result = df1.join(df2, on="i1", rsuffix="_") + expected = DataFrame({'i1': {0: 0.0, 1: 1}, 'i2': {0: 0, 1: 1}, + 'i1_': {0: 0, 1: np.nan}, 'i3': {0: 0.0, 1: np.nan}, + None: {0: 0, 1: 0}}).set_index(None).reset_index()[['i1', 'i2', 'i1_', 'i3']] + assert_frame_equal(result, expected, check_dtype=False) + + df1 = DataFrame({"i1" : [0, 1], "i2" : [0.5, 1.5]}) + df2 = DataFrame({"i1" : [0], "i3" : [0.7]}) + result = df1.join(df2, rsuffix="_", on='i1') + expected = DataFrame({'i1': {0: 0, 1: 1}, 'i1_': {0: 0.0, 1: nan}, + 'i2': {0: 0.5, 1: 1.5}, 'i3': {0: 0.69999999999999996, + 1: nan}})[['i1', 'i2', 'i1_', 'i3']] + assert_frame_equal(result, expected) + + def test_overlapping_columns_error_message(self): # #2649 df = DataFrame({'key': [1, 2, 3],