From f81c4ee66522d69739b51fc4d32265f56606f4b5 Mon Sep 17 00:00:00 2001
From: Nico Cernek <ncernek@gmail.com>
Date: Fri, 2 Aug 2019 14:12:31 -0600
Subject: [PATCH 01/11] add failing test to check row order preservation

correct the imports

broken commit with a bunch of print statements and comments

add test for left merge

swap left and right keys when how == "right"

correct old test: right-merge row order is now the same as the right df

clean up spacing and delete temp code

add whatsnew

replace .from_records with default constructor

add GH issue # to tests

revert commit ed54bec7e

change logic to swap left and right if how==right

clean formatting

rename vars and add comment for clarity

combine tests into one

update whatsnew

Update doc/source/whatsnew/v1.0.0.rst

Co-Authored-By: William Ayd <william.ayd@icloud.com>

add before and after examples

linting

cleanup

changes requested by jreback

update docs
---
 doc/source/whatsnew/v1.0.0.rst           |  5 ++
 pandas/core/reshape/merge.py             | 20 +++++--
 pandas/tests/reshape/merge/test_merge.py | 76 +++++++++++++++++++++---
 3 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 6597b764581a4..fa81edb4a3448 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -1244,8 +1244,13 @@ Reshaping
 - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
 - Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
 - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
+<<<<<<< HEAD
 - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
 - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`)
+- :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
+=======
+>>>>>>> 2b1b67592... changes requested by jreback
+-
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 49ac1b6cfa52b..898fa77a889ee 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -567,10 +567,10 @@ def __init__(
         indicator: bool = False,
         validate=None,
     ):
-        _left = _validate_operand(left)
-        _right = _validate_operand(right)
-        self.left = self.orig_left = _left
-        self.right = self.orig_right = _right
+        left = validate_operand(left)
+        right = validate_operand(right)
+        self.left = self.orig_left = left
+        self.right = self.orig_right = right
         self.how = how
         self.axis = axis
 
@@ -1292,6 +1292,9 @@ def _get_join_indexers(
         right_keys
     ), "left_key and right_keys must be the same length"
 
+    # bind `sort` arg. of _factorize_keys
+    fkeys = partial(_factorize_keys, sort=sort)
+
     # get left & right join labels and num. of levels at each location
     mapped = (
         _factorize_keys(left_keys[n], right_keys[n], sort=sort)
@@ -1306,15 +1309,20 @@ def _get_join_indexers(
     # factorize keys to a dense i8 space
     # `count` is the num. of unique keys
     # set(lkey) | set(rkey) == range(count)
-    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
 
+    # flip left and right keys if performing a right merge
+    # to preserve right merge row order (GH 27453)
+    if how == "right":
+        factorized_rkey, factorized_lkey, count = fkeys(rkey, lkey)
+    else:
+        factorized_lkey, factorized_rkey, count = fkeys(lkey, rkey)
     # preserve left frame order if how == 'left' and sort == False
     kwargs = copy.copy(kwargs)
     if how == "left":
         kwargs["sort"] = sort
     join_func = _join_functions[how]
 
-    return join_func(lkey, rkey, count, **kwargs)
+    return join_func(factorized_lkey, factorized_rkey, count, **kwargs)
 
 
 def _restore_dropped_levels_multijoin(
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index fd189c7435b29..53f18479d1729 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -1286,17 +1286,17 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
         # GH 24212
         # pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
         # -1 is interpreted as a missing value instead of the last element
-        df1 = pd.DataFrame({"a": [1, 2, 3], "key": [0, 2, 2]}, index=index)
-        df2 = pd.DataFrame({"b": [1, 2, 3, 4, 5]})
+        df1 = pd.DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index)
+        df2 = pd.DataFrame({"b": [0, 1, 2, 3, 4, 5]})
         result = df1.merge(df2, left_on="key", right_index=True, how=how)
         expected = pd.DataFrame(
             [
-                [1.0, 0, 1],
-                [2.0, 2, 3],
-                [3.0, 2, 3],
-                [np.nan, 1, 2],
-                [np.nan, 3, 4],
-                [np.nan, 4, 5],
+                [0, 0, 0],
+                [1, 1, 1],
+                [2, 2, 2],
+                [np.nan, 3, 3],
+                [np.nan, 4, 4],
+                [np.nan, 5, 5],
             ],
             columns=["a", "key", "b"],
         )
@@ -2167,3 +2167,63 @@ def test_merge_datetime_upcast_dtype():
         }
     )
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("how", ["left", "right"])
+def test_merge_preserves_row_order(how):
+    # GH 27453
+    population = [
+        ("Jenn", "Jamaica", 3),
+        ("Beth", "Bulgaria", 7),
+        ("Carl", "Canada", 30),
+    ]
+    columns = ["name", "country", "population"]
+    population_df = DataFrame(population, columns=columns)
+
+    people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
+    columns = ["name", "country"]
+    people_df = DataFrame(people, columns=columns)
+
+    expected_data = [
+        ("Abe", "America", np.nan),
+        ("Beth", "Bulgaria", 7),
+        ("Carl", "Canada", 30),
+    ]
+    expected_cols = ["name", "country", "population"]
+    expected = DataFrame(expected_data, columns=expected_cols)
+
+    result = pop.merge(ppl, on=("name", "country"), how="right")
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_left_merge_preserves_row_order():
+    # GH 27453
+    population = [
+        ("Jenn", "Jamaica", 3),
+        ("Beth", "Bulgaria", 7),
+        ("Carl", "Canada", 30),
+    ]
+    columns = ["name", "country", "population"]
+    pop = DataFrame(population, columns=columns)
+
+    people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
+    columns = ["name", "country"]
+    ppl = DataFrame(people, columns=columns)
+
+    expected_data = [
+        ("Abe", "America", np.nan),
+        ("Beth", "Bulgaria", 7),
+        ("Carl", "Canada", 30),
+    ]
+    expected_cols = ["name", "country", "population"]
+    expected = DataFrame(expected_data, columns=expected_cols)
+
+    result = ppl.merge(pop, on=("name", "country"), how="left")
+    if how == "right":
+        left_df, right_df = population_df, people_df
+    elif how == "left":
+        left_df, right_df = people_df, population_df
+
+    result = left_df.merge(right_df, on=("name", "country"), how=how)
+    tm.assert_frame_equal(expected, result)

From 25f7e034b613b26ee5059e03e97600be2ff4fab6 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 24 Jan 2020 11:22:26 +0000
Subject: [PATCH 02/11] :twisted_rightwards_arrows: fix conflicts, fix
 nameerror in tests

---
 doc/source/whatsnew/v1.0.0.rst           |  4 ----
 pandas/core/reshape/merge.py             |  8 +++----
 pandas/tests/reshape/merge/test_merge.py | 28 ------------------------
 3 files changed, 4 insertions(+), 36 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index fa81edb4a3448..2a59a78c04aa2 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -1244,13 +1244,9 @@ Reshaping
 - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
 - Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
 - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
-<<<<<<< HEAD
 - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
 - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`)
 - :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
-=======
->>>>>>> 2b1b67592... changes requested by jreback
--
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 898fa77a889ee..46c591f423559 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -567,10 +567,10 @@ def __init__(
         indicator: bool = False,
         validate=None,
     ):
-        left = validate_operand(left)
-        right = validate_operand(right)
-        self.left = self.orig_left = left
-        self.right = self.orig_right = right
+        _left = _validate_operand(left)
+        _right = _validate_operand(right)
+        self.left = self.orig_left = _left
+        self.right = self.orig_right = _right
         self.how = how
         self.axis = axis
 
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 53f18479d1729..964277e8a1ac9 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2192,34 +2192,6 @@ def test_merge_preserves_row_order(how):
     expected_cols = ["name", "country", "population"]
     expected = DataFrame(expected_data, columns=expected_cols)
 
-    result = pop.merge(ppl, on=("name", "country"), how="right")
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_left_merge_preserves_row_order():
-    # GH 27453
-    population = [
-        ("Jenn", "Jamaica", 3),
-        ("Beth", "Bulgaria", 7),
-        ("Carl", "Canada", 30),
-    ]
-    columns = ["name", "country", "population"]
-    pop = DataFrame(population, columns=columns)
-
-    people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
-    columns = ["name", "country"]
-    ppl = DataFrame(people, columns=columns)
-
-    expected_data = [
-        ("Abe", "America", np.nan),
-        ("Beth", "Bulgaria", 7),
-        ("Carl", "Canada", 30),
-    ]
-    expected_cols = ["name", "country", "population"]
-    expected = DataFrame(expected_data, columns=expected_cols)
-
-    result = ppl.merge(pop, on=("name", "country"), how="left")
     if how == "right":
         left_df, right_df = population_df, people_df
     elif how == "left":

From a53cc2216d38b9ab5999c06930c8c20452136609 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Thu, 30 Jan 2020 12:55:52 +0000
Subject: [PATCH 03/11] :pencil: add whatsnew entry to v1.0.1

---
 doc/source/whatsnew/v1.0.0.rst           |  1 -
 pandas/tests/reshape/merge/test_merge.py | 34 ++++++++++++------------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 2a59a78c04aa2..6597b764581a4 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -1246,7 +1246,6 @@ Reshaping
 - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
 - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
 - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`)
-- :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 964277e8a1ac9..dbcdea1d454bb 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2172,25 +2172,25 @@ def test_merge_datetime_upcast_dtype():
 @pytest.mark.parametrize("how", ["left", "right"])
 def test_merge_preserves_row_order(how):
     # GH 27453
-    population = [
-        ("Jenn", "Jamaica", 3),
-        ("Beth", "Bulgaria", 7),
-        ("Carl", "Canada", 30),
-    ]
-    columns = ["name", "country", "population"]
-    population_df = DataFrame(population, columns=columns)
+    population_df = DataFrame(
+        {
+            "name": ["Jenn", "Beth", "Carl"],
+            "country": ["Jamaica", "Bulgaria", "Canada"],
+            "population": [3, 7, 30],
+        }
+    )
 
-    people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
-    columns = ["name", "country"]
-    people_df = DataFrame(people, columns=columns)
+    people_df = DataFrame(
+        {"name": ["Abe", "Beth", "Carl"], "country": ["America", "Bulgaria", "Canada"]}
+    )
 
-    expected_data = [
-        ("Abe", "America", np.nan),
-        ("Beth", "Bulgaria", 7),
-        ("Carl", "Canada", 30),
-    ]
-    expected_cols = ["name", "country", "population"]
-    expected = DataFrame(expected_data, columns=expected_cols)
+    expected = DataFrame(
+        {
+            "name": ["Abe", "Beth", "Carl"],
+            "country": ["America", "Bulgaria", "Canada"],
+            "population": [np.nan, 7, 30],
+        }
+    )
 
     if how == "right":
         left_df, right_df = population_df, people_df

From 2d77a5c492c1e7712f7f5cc3199389bed2dc3fcf Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Thu, 13 Feb 2020 13:31:08 +0000
Subject: [PATCH 04/11] pass  to _factorize_keys

---
 pandas/core/reshape/merge.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 46c591f423559..924cb06011a46 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1292,12 +1292,9 @@ def _get_join_indexers(
         right_keys
     ), "left_key and right_keys must be the same length"
 
-    # bind `sort` arg. of _factorize_keys
-    fkeys = partial(_factorize_keys, sort=sort)
-
     # get left & right join labels and num. of levels at each location
     mapped = (
-        _factorize_keys(left_keys[n], right_keys[n], sort=sort)
+        _factorize_keys(left_keys[n], right_keys[n], sort=sort, how=how)
         for n in range(len(left_keys))
     )
     zipped = zip(*mapped)
@@ -1310,19 +1307,14 @@ def _get_join_indexers(
     # `count` is the num. of unique keys
     # set(lkey) | set(rkey) == range(count)
 
-    # flip left and right keys if performing a right merge
-    # to preserve right merge row order (GH 27453)
-    if how == "right":
-        factorized_rkey, factorized_lkey, count = fkeys(rkey, lkey)
-    else:
-        factorized_lkey, factorized_rkey, count = fkeys(lkey, rkey)
+    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort, how=how)
     # preserve left frame order if how == 'left' and sort == False
     kwargs = copy.copy(kwargs)
     if how == "left":
         kwargs["sort"] = sort
     join_func = _join_functions[how]
 
-    return join_func(factorized_lkey, factorized_rkey, count, **kwargs)
+    return join_func(lkey, rkey, count, **kwargs)
 
 
 def _restore_dropped_levels_multijoin(
@@ -1858,7 +1850,7 @@ def _right_outer_join(x, y, max_groups):
 }
 
 
-def _factorize_keys(lk, rk, sort=True):
+def _factorize_keys(lk, rk, sort=True, how="inner"):
     # Some pre-processing for non-ndarray lk / rk
     if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
         lk = getattr(lk, "_values", lk)._data
@@ -1927,6 +1919,8 @@ def _factorize_keys(lk, rk, sort=True):
             np.putmask(rlab, rmask, count)
         count += 1
 
+    if how == "right":
+        return rlab, llab, count
     return llab, rlab, count
 
 

From bab654ecc96c371ef7783395cc7fc1dea466288f Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Thu, 13 Feb 2020 13:58:50 +0000
Subject: [PATCH 05/11] Add tests with merging on index, using original OP's
 example as test

---
 doc/source/whatsnew/v1.1.0.rst           | 21 +++++++
 pandas/tests/reshape/merge/test_merge.py | 70 +++++++++++++-----------
 2 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 13827e8fc4c33..c039ff10422c5 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -67,6 +67,27 @@ Backwards incompatible API changes
 - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`)
 -
 
+:meth:`DataFrame.merge` preserves right frame's row order
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+:meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
+
+.. ipython:: python
+    left_df = pd.DataFrame({'animal': ['dog', 'pig'], 'max_speed': [40, 11]})
+    right_df = pd.DataFrame({'animal': ['quetzal', 'pig'], 'max_speed': [80, 11]})
+    left_df
+    right_df
+*pandas 1.0.x*
+
+.. code-block:: python
+    >>> left_df.merge(right_df, on=['animal', 'max_speed'], how="right")
+        animal  max_speed
+    0      pig         11
+    1  quetzal         80
+*pandas 1.1.0*
+
+.. ipython:: python
+    left_df.merge(right_df, on=['animal', 'max_speed'], how="right")
+
 .. ---------------------------------------------------------------------------
 
 .. _whatsnew_110.deprecations:
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index dbcdea1d454bb..cc9c450a6c1de 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -1318,6 +1318,44 @@ def test_merge_right_index_right(self):
         result = left.merge(right, left_on="key", right_index=True, how="right")
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("how", ["left", "right"])
+    def test_merge_preserves_row_order(self, how):
+        # GH 27453
+        a = [2, 5, 3, 5]
+        df1 = pd.DataFrame({"A": a, "B": [8, 2, 4, 1]})
+        df2 = pd.DataFrame({"A": a, "B": [7, 1, 3, 0]})
+
+        result = df1.merge(df2[["A", "B"]], on=["A", "B"], how=how)
+        expected = pd.DataFrame({"A": a})
+        if how == "right":
+            expected["B"] = df2["B"]
+        else:
+            expected["B"] = df1["B"]
+        tm.assert_frame_equal(result, expected)
+
+        left_df = pd.DataFrame({"colors": ["blue", "red"]}, index=pd.Index([0, 1]))
+        right_df = pd.DataFrame({"hats": ["small", "big"]}, index=pd.Index([1, 0]))
+        result = left_df.merge(right_df, left_index=True, right_index=True, how=how)
+        if how == "right":
+            expected = pd.DataFrame(
+                {"colors": ["red", "blue"], "hats": ["small", "big"]}
+            )
+        else:
+            expected = pd.DataFrame(
+                {"colors": ["blue", "red"], "hats": ["big", "small"]}
+            )
+
+        left_df = pd.DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]})
+        right_df = pd.DataFrame({"animal": ["quetzal", "pig"], "max_speed": [80, 11]})
+        result = left_df.merge(right_df, on=["animal", "max_speed"], how=how)
+        if how == "right":
+            expected = pd.DataFrame(
+                {"animal": ["quetzal", "pig"], "max_speed": [80, 11]}
+            )
+        else:
+            expected = pd.DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]})
+        tm.assert_frame_equal(result, expected)
+
     def test_merge_take_missing_values_from_index_of_other_dtype(self):
         # GH 24212
         left = pd.DataFrame(
@@ -2167,35 +2205,3 @@ def test_merge_datetime_upcast_dtype():
         }
     )
     tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("how", ["left", "right"])
-def test_merge_preserves_row_order(how):
-    # GH 27453
-    population_df = DataFrame(
-        {
-            "name": ["Jenn", "Beth", "Carl"],
-            "country": ["Jamaica", "Bulgaria", "Canada"],
-            "population": [3, 7, 30],
-        }
-    )
-
-    people_df = DataFrame(
-        {"name": ["Abe", "Beth", "Carl"], "country": ["America", "Bulgaria", "Canada"]}
-    )
-
-    expected = DataFrame(
-        {
-            "name": ["Abe", "Beth", "Carl"],
-            "country": ["America", "Bulgaria", "Canada"],
-            "population": [np.nan, 7, 30],
-        }
-    )
-
-    if how == "right":
-        left_df, right_df = population_df, people_df
-    elif how == "left":
-        left_df, right_df = people_df, population_df
-
-    result = left_df.merge(right_df, on=("name", "country"), how=how)
-    tm.assert_frame_equal(expected, result)

From 714f5b4c8aec32868d299dfca3fe665d4ab789b5 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Thu, 13 Feb 2020 14:30:31 +0000
Subject: [PATCH 06/11] fix whatsnew ipython directive

---
 doc/source/whatsnew/v1.1.0.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index c039ff10422c5..2b3644955a494 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -72,20 +72,25 @@ Backwards incompatible API changes
 :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
 
 .. ipython:: python
+
     left_df = pd.DataFrame({'animal': ['dog', 'pig'], 'max_speed': [40, 11]})
     right_df = pd.DataFrame({'animal': ['quetzal', 'pig'], 'max_speed': [80, 11]})
     left_df
     right_df
+
 *pandas 1.0.x*
 
 .. code-block:: python
+
     >>> left_df.merge(right_df, on=['animal', 'max_speed'], how="right")
         animal  max_speed
     0      pig         11
     1  quetzal         80
+
 *pandas 1.1.0*
 
 .. ipython:: python
+
     left_df.merge(right_df, on=['animal', 'max_speed'], how="right")
 
 .. ---------------------------------------------------------------------------

From c83f46f585dad11295f998725e4dbf6fa0f9f663 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sun, 15 Mar 2020 15:03:44 +0000
Subject: [PATCH 07/11] Add docstring and types

---
 pandas/core/reshape/merge.py | 52 +++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index a9099eb81ec87..9418a55be41ee 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1822,7 +1822,57 @@ def _right_outer_join(x, y, max_groups):
     return left_indexer, right_indexer
 
 
-def _factorize_keys(lk, rk, sort=True, how="inner"):
+def _factorize_keys(lk, rk, sort=True, how="inner") -> Tuple[np.array, np.array, int]:
+    """
+    Encode left and right keys as enumerated types.
+
+    This is used to get the join indexers to be used when merging DataFrames.
+
+    Parameters
+    ----------
+    lk : array-like
+        Left key.
+    rk : array-like
+        Right key.
+    sort : bool, defaults to True
+        If True, the encoding is done such that the unique elements in the
+        keys are sorted.
+    how : {‘left’, ‘right’, ‘outer’, ‘inner’}, default ‘inner’
+        Type of merge.
+
+    Returns
+    -------
+    array
+        Left (resp. right if called with `key='right'`) labels, as enumerated type.
+    array
+        Right (resp. left if called with `key='right'`) labels, as enumerated type.
+    int
+        Number of unique elements in union of left and right labels.
+
+    See Also
+    --------
+    merge : Merge DataFrame or named Series objects
+        with a database-style join.
+    algorithms.factorize : Encode the object as an enumerated type
+        or categorical variable.
+
+    Examples
+    --------
+    >>> lk = np.array(["a", "c", "b"])
+    >>> rk = np.array(["a", "c"])
+
+    Here, the unique values are `'a', 'b', 'c'`. With the default
+    `sort=True`, the encoding will be `{0: 'a', 1: 'b', 2: 'c'}`:
+
+    >>> pd.core.reshape.merge._factorize_keys(lk, rk)
+    (array([0, 2, 1]), array([0, 2]), 3)
+
+    With the `sort=False`, the encoding will correspond to the order
+    in which the unique elements first appear: `{0: 'a', 1: 'c', 2: 'b'}`:
+
+    >>> pd.core.reshape.merge._factorize_keys(lk, rk, sort=False)
+    (array([0, 1, 2]), array([0, 1]), 3)
+    """
     # Some pre-processing for non-ndarray lk / rk
     lk = extract_array(lk, extract_numpy=True)
     rk = extract_array(rk, extract_numpy=True)

From 65c32261945d1ef2936588fef5b2ab8dbe915ce7 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sun, 15 Mar 2020 15:19:31 +0000
Subject: [PATCH 08/11] remove unnecessary test, remove test that doesn't fail
 on master

---
 pandas/tests/reshape/merge/test_merge.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 8ca63819ecb31..a6a76a1078667 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -1321,30 +1321,6 @@ def test_merge_right_index_right(self):
     @pytest.mark.parametrize("how", ["left", "right"])
     def test_merge_preserves_row_order(self, how):
         # GH 27453
-        a = [2, 5, 3, 5]
-        df1 = pd.DataFrame({"A": a, "B": [8, 2, 4, 1]})
-        df2 = pd.DataFrame({"A": a, "B": [7, 1, 3, 0]})
-
-        result = df1.merge(df2[["A", "B"]], on=["A", "B"], how=how)
-        expected = pd.DataFrame({"A": a})
-        if how == "right":
-            expected["B"] = df2["B"]
-        else:
-            expected["B"] = df1["B"]
-        tm.assert_frame_equal(result, expected)
-
-        left_df = pd.DataFrame({"colors": ["blue", "red"]}, index=pd.Index([0, 1]))
-        right_df = pd.DataFrame({"hats": ["small", "big"]}, index=pd.Index([1, 0]))
-        result = left_df.merge(right_df, left_index=True, right_index=True, how=how)
-        if how == "right":
-            expected = pd.DataFrame(
-                {"colors": ["red", "blue"], "hats": ["small", "big"]}
-            )
-        else:
-            expected = pd.DataFrame(
-                {"colors": ["blue", "red"], "hats": ["big", "small"]}
-            )
-
         left_df = pd.DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]})
         right_df = pd.DataFrame({"animal": ["quetzal", "pig"], "max_speed": [80, 11]})
         result = left_df.merge(right_df, on=["animal", "max_speed"], how=how)

From 0e2c529730503783aeeaaefc3c634f09ea8e5e74 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sun, 15 Mar 2020 18:01:45 +0000
Subject: [PATCH 09/11] Add docstring

---
 pandas/core/reshape/merge.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 9418a55be41ee..df3e70ea2316d 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1822,7 +1822,9 @@ def _right_outer_join(x, y, max_groups):
     return left_indexer, right_indexer
 
 
-def _factorize_keys(lk, rk, sort=True, how="inner") -> Tuple[np.array, np.array, int]:
+def _factorize_keys(
+    lk, rk, sort: bool = True, how: str = "inner"
+) -> Tuple[np.array, np.array, int]:
     """
     Encode left and right keys as enumerated types.
 
@@ -1884,7 +1886,7 @@ def _factorize_keys(lk, rk, sort=True, how="inner") -> Tuple[np.array, np.array,
         rk, _ = rk._values_for_factorize()
 
     elif (
-        is_categorical_dtype(lk) and is_categorical_dtype(rk) and lk.is_dtype_equal(rk)
+        is_categorical_dtype(lk) and is_categorical_dtype(rk) and is_dtype_equal(lk, rk)
     ):
         if lk.categories.equals(rk.categories):
             # if we exactly match in categories, allow us to factorize on codes

From 711d37c5fba8d5b0f9fc25a3ffce300e2e60f23d Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Wed, 25 Mar 2020 23:54:13 +0000
Subject: [PATCH 10/11] use cast in categorical case

---
 pandas/core/reshape/merge.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 4400de1c945bd..6e024560ea2e4 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -6,14 +6,14 @@
 import datetime
 from functools import partial
 import string
-from typing import TYPE_CHECKING, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Optional, Tuple, Union, cast
 import warnings
 
 import numpy as np
 
 from pandas._libs import Timedelta, hashtable as libhashtable, lib
 import pandas._libs.join as libjoin
-from pandas._typing import FrameOrSeries
+from pandas._typing import ArrayLike, FrameOrSeries
 from pandas.errors import MergeError
 from pandas.util._decorators import Appender, Substitution
 
@@ -24,6 +24,7 @@
     is_array_like,
     is_bool,
     is_bool_dtype,
+    is_categorical,
     is_categorical_dtype,
     is_datetime64tz_dtype,
     is_dtype_equal,
@@ -1823,7 +1824,7 @@ def _right_outer_join(x, y, max_groups):
 
 
 def _factorize_keys(
-    lk, rk, sort: bool = True, how: str = "inner"
+    lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner"
 ) -> Tuple[np.array, np.array, int]:
     """
     Encode left and right keys as enumerated types.
@@ -1888,6 +1889,9 @@ def _factorize_keys(
     elif (
         is_categorical_dtype(lk) and is_categorical_dtype(rk) and is_dtype_equal(lk, rk)
     ):
+        assert is_categorical(lk) and is_categorical(rk)
+        lk = cast(Categorical, lk)
+        rk = cast(Categorical, rk)
         if lk.categories.equals(rk.categories):
             # if we exactly match in categories, allow us to factorize on codes
             rk = rk.codes

From aaf542eba45ea4091f4fe93cab3a7aaadf0b8f5a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Thu, 26 Mar 2020 12:40:42 +0000
Subject: [PATCH 11/11] reword titles in whatsnew (v1.0.x -> previous behavior)

---
 doc/source/whatsnew/v1.1.0.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index a0119d15b3715..e0d60e56796dd 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -179,7 +179,7 @@ key and type of :class:`Index`.  These now consistently raise ``KeyError`` (:iss
     left_df
     right_df
 
-*pandas 1.0.x*
+*Previous behavior*:
 
 .. code-block:: python
 
@@ -188,7 +188,7 @@ key and type of :class:`Index`.  These now consistently raise ``KeyError`` (:iss
     0      pig         11
     1  quetzal         80
 
-*pandas 1.1.0*
+*New behavior*:
 
 .. ipython:: python