Skip to content

Commit f81c4ee

Browse files
Nico CernekMarco Gorelli
Nico Cernek
authored and
Marco Gorelli
committed
add failing test to check row order preservation
correct the imports broken commit with a bunch of print statements and comments add test for left merge swap left and right keys when how == "right" correct old test: right-merge row order is now the same as the right df clean up spacing and delete temp code add whatsnew replace .from_records with default constructor add GH issue # to tests revert commit ed54bec change logic to swap left and right if how==right clean formatting rename vars and add comment for clarity combine tests into one update whatsnew Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: William Ayd <william.ayd@icloud.com> add before and after examples linting cleanup changes requested by jreback update docs
1 parent 56cc7f4 commit f81c4ee

File tree

3 files changed

+87
-14
lines changed

3 files changed

+87
-14
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,8 +1244,13 @@ Reshaping
12441244
- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
12451245
- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
12461246
- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
1247+
<<<<<<< HEAD
12471248
- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
12481249
- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`)
1250+
- :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
1251+
=======
1252+
>>>>>>> 2b1b67592... changes requested by jreback
1253+
-
12491254

12501255
Sparse
12511256
^^^^^^

pandas/core/reshape/merge.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -567,10 +567,10 @@ def __init__(
567567
indicator: bool = False,
568568
validate=None,
569569
):
570-
_left = _validate_operand(left)
571-
_right = _validate_operand(right)
572-
self.left = self.orig_left = _left
573-
self.right = self.orig_right = _right
570+
left = validate_operand(left)
571+
right = validate_operand(right)
572+
self.left = self.orig_left = left
573+
self.right = self.orig_right = right
574574
self.how = how
575575
self.axis = axis
576576

@@ -1292,6 +1292,9 @@ def _get_join_indexers(
12921292
right_keys
12931293
), "left_key and right_keys must be the same length"
12941294

1295+
# bind `sort` arg. of _factorize_keys
1296+
fkeys = partial(_factorize_keys, sort=sort)
1297+
12951298
# get left & right join labels and num. of levels at each location
12961299
mapped = (
12971300
_factorize_keys(left_keys[n], right_keys[n], sort=sort)
@@ -1306,15 +1309,20 @@ def _get_join_indexers(
13061309
# factorize keys to a dense i8 space
13071310
# `count` is the num. of unique keys
13081311
# set(lkey) | set(rkey) == range(count)
1309-
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
13101312

1313+
# flip left and right keys if performing a right merge
1314+
# to preserve right merge row order (GH 27453)
1315+
if how == "right":
1316+
factorized_rkey, factorized_lkey, count = fkeys(rkey, lkey)
1317+
else:
1318+
factorized_lkey, factorized_rkey, count = fkeys(lkey, rkey)
13111319
# preserve left frame order if how == 'left' and sort == False
13121320
kwargs = copy.copy(kwargs)
13131321
if how == "left":
13141322
kwargs["sort"] = sort
13151323
join_func = _join_functions[how]
13161324

1317-
return join_func(lkey, rkey, count, **kwargs)
1325+
return join_func(factorized_lkey, factorized_rkey, count, **kwargs)
13181326

13191327

13201328
def _restore_dropped_levels_multijoin(

pandas/tests/reshape/merge/test_merge.py

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,17 +1286,17 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
12861286
# GH 24212
12871287
# pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
12881288
# -1 is interpreted as a missing value instead of the last element
1289-
df1 = pd.DataFrame({"a": [1, 2, 3], "key": [0, 2, 2]}, index=index)
1290-
df2 = pd.DataFrame({"b": [1, 2, 3, 4, 5]})
1289+
df1 = pd.DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index)
1290+
df2 = pd.DataFrame({"b": [0, 1, 2, 3, 4, 5]})
12911291
result = df1.merge(df2, left_on="key", right_index=True, how=how)
12921292
expected = pd.DataFrame(
12931293
[
1294-
[1.0, 0, 1],
1295-
[2.0, 2, 3],
1296-
[3.0, 2, 3],
1297-
[np.nan, 1, 2],
1298-
[np.nan, 3, 4],
1299-
[np.nan, 4, 5],
1294+
[0, 0, 0],
1295+
[1, 1, 1],
1296+
[2, 2, 2],
1297+
[np.nan, 3, 3],
1298+
[np.nan, 4, 4],
1299+
[np.nan, 5, 5],
13001300
],
13011301
columns=["a", "key", "b"],
13021302
)
@@ -2167,3 +2167,63 @@ def test_merge_datetime_upcast_dtype():
21672167
}
21682168
)
21692169
tm.assert_frame_equal(result, expected)
2170+
2171+
2172+
@pytest.mark.parametrize("how", ["left", "right"])
2173+
def test_merge_preserves_row_order(how):
2174+
# GH 27453
2175+
population = [
2176+
("Jenn", "Jamaica", 3),
2177+
("Beth", "Bulgaria", 7),
2178+
("Carl", "Canada", 30),
2179+
]
2180+
columns = ["name", "country", "population"]
2181+
population_df = DataFrame(population, columns=columns)
2182+
2183+
people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
2184+
columns = ["name", "country"]
2185+
people_df = DataFrame(people, columns=columns)
2186+
2187+
expected_data = [
2188+
("Abe", "America", np.nan),
2189+
("Beth", "Bulgaria", 7),
2190+
("Carl", "Canada", 30),
2191+
]
2192+
expected_cols = ["name", "country", "population"]
2193+
expected = DataFrame(expected_data, columns=expected_cols)
2194+
2195+
result = pop.merge(ppl, on=("name", "country"), how="right")
2196+
2197+
tm.assert_frame_equal(result, expected)
2198+
2199+
2200+
def test_left_merge_preserves_row_order():
2201+
# GH 27453
2202+
population = [
2203+
("Jenn", "Jamaica", 3),
2204+
("Beth", "Bulgaria", 7),
2205+
("Carl", "Canada", 30),
2206+
]
2207+
columns = ["name", "country", "population"]
2208+
pop = DataFrame(population, columns=columns)
2209+
2210+
people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
2211+
columns = ["name", "country"]
2212+
ppl = DataFrame(people, columns=columns)
2213+
2214+
expected_data = [
2215+
("Abe", "America", np.nan),
2216+
("Beth", "Bulgaria", 7),
2217+
("Carl", "Canada", 30),
2218+
]
2219+
expected_cols = ["name", "country", "population"]
2220+
expected = DataFrame(expected_data, columns=expected_cols)
2221+
2222+
result = ppl.merge(pop, on=("name", "country"), how="left")
2223+
if how == "right":
2224+
left_df, right_df = population_df, people_df
2225+
elif how == "left":
2226+
left_df, right_df = people_df, population_df
2227+
2228+
result = left_df.merge(right_df, on=("name", "country"), how=how)
2229+
tm.assert_frame_equal(expected, result)

0 commit comments

Comments
 (0)