pandas-dev · TomAugspurger · Nov 15, 2018 · Sep 19, 2018 · Sep 19, 2018 · Oct 7, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -164,6 +164,40 @@ array, but rather an ``ExtensionArray``:
 This is the same behavior as ``Series.values`` for categorical data. See
 :ref:`whatsnew_0240.api_breaking.interval_values` for more.
 
+.. _whatsnew_0240.enhancements.join_with_two_multiindexes:
+
+Joining with two multi-indexes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+As of Pandas 0.24.0 the :func:`Dataframe.join` can be used to join multi-indexed ``Dataframe`` instances on the overlaping index levels (:issue:`20356`)
+
+See the :ref:`Merge, join, and concatenate
+<merging.Join_with_two_multi_indexes>` documentation section.
+
+.. ipython:: python
+
+   index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'),
+                                      ('K1', 'X2')],
+                                       names=['key', 'X'])
+   left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
+                        'B': ['B0', 'B1', 'B2']},
+                         index=index_left)
+
+   index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
+                                    ('K2', 'Y2'), ('K2', 'Y3')],
+                                     names=['key', 'Y'])
+   right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
+                         'D': ['D0', 'D1', 'D2', 'D3']},
+                         index=index_right)
+
+    left.join(right)
+
+For earlier versions it can be done using the following.
+
+.. ipython:: python
+
+   pd.merge(left.reset_index(), right.reset_index(),
+            on=['key'], how='inner').set_index(['key','X','Y'])
 
 .. _whatsnew_0240.enhancements.other:
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3124,8 +3124,8 @@ def get_value(self, series, key):
                 iloc = self.get_loc(key)
                 return s[iloc]
             except KeyError:
-                if (len(self) > 0
-                        and (self.holds_integer() or self.is_boolean())):
+                if (len(self) > 0 and
+                        (self.holds_integer() or self.is_boolean())):
                     raise
                 elif is_integer(key):
                     return s[key]
@@ -3912,46 +3912,69 @@ def join(self, other, how='left', level=None, return_indexers=False,
 
     def _join_multi(self, other, how, return_indexers=True):
         from .multi import MultiIndex
-        self_is_mi = isinstance(self, MultiIndex)
-        other_is_mi = isinstance(other, MultiIndex)
+        from pandas.core.reshape.merge import _complete_multilevel_join
 
         # figure out join names
-        self_names = com._not_none(*self.names)
-        other_names = com._not_none(*other.names)
-        overlap = list(set(self_names) & set(other_names))
+        self_names = set(com._not_none(*self.names))
+        other_names = set(com._not_none(*other.names))
+        overlap = list(self_names & other_names)
 
-        # need at least 1 in common, but not more than 1
+        # need at least 1 in common
         if not len(overlap):
-            raise ValueError("cannot join with no level specified and no "
-                             "overlapping names")
-        if len(overlap) > 1:
-            raise NotImplementedError("merging with more than one level "
-                                      "overlap on a multi-index is not "
-                                      "implemented")
-        jl = overlap[0]
+            raise ValueError("cannot join with no overlapping index names")
+
+        self_is_mi = isinstance(self, MultiIndex)
+        other_is_mi = isinstance(other, MultiIndex)
+
+        if self_is_mi and other_is_mi:
+
+            # Drop the non matching levels
+            ldrop_levels = list(set(self_names) - set(overlap))
+            rdrop_levels = list(set(other_names) - set(overlap))
+
+            self_jnlevels = self.droplevel(ldrop_levels)
+            other_jnlevels = other.droplevel(rdrop_levels)
+
+            if not (self_jnlevels.is_unique and other_jnlevels.is_unique):
+                raise ValueError("Join on level between two MultiIndex objects"
+                                 "is ambiguous")
+
+            dropped_levels = ldrop_levels + rdrop_levels
+
+            join_idx, lidx, ridx = self_jnlevels.join(other_jnlevels, how,
+                                                      return_indexers=True)
 
+            levels, labels, names = _complete_multilevel_join(self, other, how,
+                                                              dropped_levels,
+                                                              join_idx,
+                                                              lidx, ridx)
+
+            multi_join_idx = MultiIndex(levels=levels, labels=labels,
+                                        names=names, verify_integrity=False)
+
+            multi_join_idx = multi_join_idx.remove_unused_levels()
+
+            return multi_join_idx, lidx, ridx
+
+        jl = list(overlap)[0]
+
+        # Case where only one index is multi
         # make the indices into mi's that match
-        if not (self_is_mi and other_is_mi):
-
-            flip_order = False
-            if self_is_mi:
-                self, other = other, self
-                flip_order = True
-                # flip if join method is right or left
-                how = {'right': 'left', 'left': 'right'}.get(how, how)
-
-            level = other.names.index(jl)
-            result = self._join_level(other, level, how=how,
-                                      return_indexers=return_indexers)
-
-            if flip_order:
-                if isinstance(result, tuple):
-                    return result[0], result[2], result[1]
-            return result
+        flip_order = False
+        if self_is_mi:
+            self, other = other, self
+            flip_order = True
+            # flip if join method is right or left
+            how = {'right': 'left', 'left': 'right'}.get(how, how)
+
+        level = other.names.index(jl)
+        result = self._join_level(other, level, how=how,
+                                  return_indexers=return_indexers)
 
-        # 2 multi-indexes
-        raise NotImplementedError("merging with both multi-indexes is not "
-                                  "implemented")
+        if flip_order:
+            if isinstance(result, tuple):
+                return result[0], result[2], result[1]
+        return result
 
     def _join_non_unique(self, other, how='left', return_indexers=False):
         from pandas.core.reshape.merge import _get_join_indexers

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -813,8 +813,11 @@ def _get_merge_keys(self):
 
         left, right = self.left, self.right
 
-        is_lkey = lambda x: is_array_like(x) and len(x) == len(left)
-        is_rkey = lambda x: is_array_like(x) and len(x) == len(right)
+        def is_lkey(x):
+            return is_array_like(x) and len(x) == len(left)
+
+        def is_rkey(x):
+            return is_array_like(x) and len(x) == len(right)
 
         # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A
         # user could, for example, request 'left_index' and 'left_by'. In a
@@ -968,11 +971,11 @@ def _maybe_coerce_merge_keys(self):
 
             # boolean values are considered as numeric, but are still allowed
             # to be merged on object boolean values
-            elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk))
-                    and not is_numeric_dtype(rk)):
+            elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk)) and not
+                    is_numeric_dtype(rk)):
                 raise ValueError(msg)
-            elif (not is_numeric_dtype(lk)
-                    and (is_numeric_dtype(rk) and not is_bool_dtype(rk))):
+            elif (not is_numeric_dtype(lk) and
+                  (is_numeric_dtype(rk) and not is_bool_dtype(rk))):
                 raise ValueError(msg)
             elif is_datetimelike(lk) and not is_datetimelike(rk):
                 raise ValueError(msg)
@@ -1138,6 +1141,82 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner',
     return join_func(lkey, rkey, count, **kwargs)
 
 
+def _complete_multilevel_join(left, right, how, dropped_levels,
+                              join_idx, lidx, ridx):
+    """
+    *this is an internal non-public method*
+
+    Returns the levels, labels and names of a multilevel to multilevel join
+    Depending on the type of join, this method restores the appropriate
+    dropped levels of the joined multi-index. The method relies on lidx, ridx
+    which hold the index positions of left and right, where a join was feasible
+
+    Parameters
+    ----------
+    left : Index
+        left index
+    right : Index
+        right index
+    join_idx : Index
+        the index of the join between the common levels of left and right
+    how : {'left', 'right', 'outer', 'inner'}
+    lidx : intp array
+        left indexer
+    right : intp array
+        right indexer
+    dropped_levels : str array
+        list of non-common levels
+
+    Returns
+    -------
+    levels : intp array
+        levels of combined multiindexes
+    labels : str array
+        labels of combined multiindexes
+    names : str array
+        names of combined multiindexes
+
+    """
+
+    join_levels = join_idx.levels
+    join_labels = join_idx.labels
+    join_names = join_idx.names
+
+    # lidx and ridx hold the indexes where the join occured
+    # for left and right respectively. If left (right) is None it means that
+    # the join occured on all indices of left (right)
+    if lidx is None:
+        lidx = range(0, len(left))
+
+    if ridx is None:
+        ridx = range(0, len(right))
+
+    # Iterate through the levels that must be restored
+    for dl in dropped_levels:
+        if dl in left.names:
+            idx = left
+            indexer = lidx
+        else:
+            idx = right
+            indexer = ridx
+
+        # The index of the level name to be restored
+        name_idx = idx.names.index(dl)
+
+        restore_levels = idx.levels[name_idx].values
+        restore_labels = idx.labels[name_idx]
+
+        join_levels = join_levels.__add__([restore_levels])
+        join_names = join_names.__add__([dl])
+
+        # Inject -1 in the labels list where a join was not possible
+        # IOW indexer[i]=-1
+        labels = [restore_labels[i] if i != -1 else -1 for i in indexer]
+        join_labels = join_labels.__add__([labels])
+
+    return join_levels, join_labels, join_names
+
+
 class _OrderedMerge(_MergeOperation):
     _merge_type = 'ordered_merge'
 
@@ -1450,6 +1529,9 @@ def flip(xs):
 def _get_multiindex_indexer(join_keys, index, sort):
     from functools import partial
 
+    def i8copy(a):
+        return a.astype('i8', subok=False, copy=True)
+
     # bind `sort` argument
     fkeys = partial(_factorize_keys, sort=sort)
 
@@ -1458,7 +1540,6 @@ def _get_multiindex_indexer(join_keys, index, sort):
     if sort:
         rlab = list(map(np.take, rlab, index.labels))
     else:
-        i8copy = lambda a: a.astype('i8', subok=False, copy=True)
         rlab = list(map(i8copy, index.labels))
 
     # fix right labels if there were any nulls
@@ -1604,8 +1685,11 @@ def _sort_labels(uniques, left, right):
 
 def _get_join_keys(llab, rlab, shape, sort):
 
+    def pred(i):
+        return not is_int64_overflow_possible(shape[:i])
+
     # how many levels can be done without overflow
-    pred = lambda i: not is_int64_overflow_possible(shape[:i])
+
     nlev = next(filter(pred, range(len(shape), 0, -1)))
 
     # get keys for the first `nlev` levels