changed according to comments

topper-123 · topper-123 · commit 6172d9a6a09c · 2019-04-24T17:43:49.000+02:00
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -918,8 +918,7 @@ def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
         """
-        attrs = []
-        attrs.append(('dtype', "'{}'".format(self.dtype)))
+        attrs = [('dtype', "'{}'".format(self.dtype))]
         if self.names is not None and any(self.names):
             attrs.append(('names', default_pprint(self.names)))
         max_seq_items = get_option('display.max_seq_items') or len(self)
@@ -935,29 +934,7 @@ def _format_data(self, name=None):
         Return the formatted data as a unicode string
         """
         return format_object_summary(self, self._formatter_func,
-                                     name=name, is_multi=True)
-
-    def __unicode__(self):
-        """
-        Return a string representation for this MultiIndex.
-
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
-        """
-        klass = self.__class__.__name__
-        data = self._format_data()
-        attrs = self._format_attrs()
-        space = self._format_space()
-
-        prepr = (",%s" % space).join("%s=%s" % (k, v) for k, v in attrs)
-
-        # no data provided, just attributes
-        if data is None:
-            data = ''
-
-        res = "%s(%s%s)" % (klass, data, prepr)
-
-        return res
+                                     name=name, line_break_each_value=True)
 
     def _format_native_types(self, na_rep='nan', **kwargs):
         new_levels = []
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -265,7 +265,7 @@ class TableSchemaFormatter(BaseFormatter):
 
 
 def format_object_summary(obj, formatter, is_justify=True, name=None,
-                          indent_for_name=True, is_multi=False):
+                          indent_for_name=True, line_break_each_value=False):
     """
     Return the formatted obj as a unicode string
 
@@ -282,8 +282,10 @@ def format_object_summary(obj, formatter, is_justify=True, name=None,
     indent_for_name : bool, default True
         Whether subsequent lines should be be indented to
         align with the name.
-    is_multi : bool, default False
-        Is ``obj`` a :class:`MultiIndex` or not
+    line_break_each_value : bool, default False
+        If True, inserts a line break for each value of ``obj``.
+        If False, only break lines when the a line of values gets wider
+        than the display width
 
     Returns
     -------
@@ -308,7 +310,11 @@ def format_object_summary(obj, formatter, is_justify=True, name=None,
         space2 = "\n "  # space for the opening '['
 
     n = len(obj)
-    sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
+    if not line_break_each_value:
+        sep = ','
+    else:
+        # If we want to align on each value, we need a different separator.
+        sep = (',\n ' + ' ' * len(name))
     max_seq_items = get_option('display.max_seq_items') or n
 
     # are we a truncated display
@@ -336,10 +342,10 @@ def best_len(values):
 
     if n == 0:
         summary = '[]{}'.format(close)
-    elif n == 1 and not is_multi:
+    elif n == 1 and not line_break_each_value:
         first = formatter(obj[0])
         summary = '[{}]{}'.format(first, close)
-    elif n == 2 and not is_multi:
+    elif n == 2 and not line_break_each_value:
         first = formatter(obj[0])
         last = formatter(obj[-1])
         summary = '[{}, {}]{}'.format(first, last, close)
@@ -355,22 +361,31 @@ def best_len(values):
 
         # adjust all values to max length if needed
         if is_justify:
-            head, tail = _justify(head, tail, display_width, best_len,
-                                  is_truncated, is_multi)
-        if is_multi:
+            if line_break_each_value:
+                head, tail = _justify(head, tail)
+            elif (is_truncated or not (len(', '.join(head)) < display_width and
+                                       len(', '.join(tail)) < display_width)):
+                max_length = max(best_len(head), best_len(tail))
+                head = [x.rjust(max_length) for x in head]
+                tail = [x.rjust(max_length) for x in tail]
+            # If we are not truncated and we are only a single
+            # line, then don't justify
+
+        if line_break_each_value:
+            # truncate vertically if wider than max_space
             max_space = display_width - len(space2)
             item = tail[0]
-            for i in reversed(range(1, len(item) + 1)):
-                if len(_pprint_seq(item, max_seq_items=i)) < max_space:
+            for max_items in reversed(range(1, len(item) + 1)):
+                if len(_pprint_seq(item, max_seq_items=max_items)) < max_space:
                     break
-            head = [_pprint_seq(x, max_seq_items=i) for x in head]
-            tail = [_pprint_seq(x, max_seq_items=i) for x in tail]
+            head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
+            tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
 
         summary = ""
         line = space2
 
-        for i in range(len(head)):
-            word = head[i] + sep + ' '
+        for max_items in range(len(head)):
+            word = head[max_items] + sep + ' '
             summary, line = _extend_line(summary, line, word,
                                          display_width, space2)
 
@@ -379,8 +394,8 @@ def best_len(values):
             summary += line.rstrip() + space2 + '...'
             line = space2
 
-        for i in range(len(tail) - 1):
-            word = tail[i] + sep + ' '
+        for max_items in range(len(tail) - 1):
+            word = tail[max_items] + sep + ' '
             summary, line = _extend_line(summary, line, word,
                                          display_width, space2)
 
@@ -394,7 +409,7 @@ def best_len(values):
         close = ']' + close.rstrip(' ')
         summary += close
 
-        if len(summary) > (display_width) or is_multi:
+        if len(summary) > (display_width) or line_break_each_value:
             summary += space1
         else:  # one row
             summary += ' '
@@ -405,50 +420,41 @@ def best_len(values):
     return summary
 
 
-def _justify(head, tail, display_width, best_len,
-             is_truncated=False, is_multi=False):
-    """
-    Justify each item in head and tail, so they align properly.
+def _justify(head, tail):
     """
-    if is_multi:
-        max_length = _max_level_item_length(head + tail)
-        head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
-                for seq in head]
-        tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
-                for seq in tail]
-    elif (is_truncated or not (len(', '.join(head)) < display_width and
-                               len(', '.join(tail)) < display_width)):
-        max_length = max(best_len(head), best_len(tail))
-        head = [x.rjust(max_length) for x in head]
-        tail = [x.rjust(max_length) for x in tail]
-
-    return head, tail
-
-
-def _max_level_item_length(seq):
-    """
-    For each position for the sequences in ``seq``, find the largest length.
-
-    Used for justifying individual values in a :class:`pandas.MultiIndex`.
+    Justify each item in each list-like in head and tail, so each item
+    right-aligns when the two list-likes are stacked vertically.
 
     Parameters
     ----------
-    seq : list-like of list-likes of strings
+    head : list-like of list-likes of strings
+    tail : list-like of list-likes of strings
 
     Returns
     -------
-    max_length : list of ints
+    head : list of tuples of strings
+    tail : list of tuples of strings
 
     Examples
     --------
-    >>> _max_level_item_length([['s', 'ab'], ['abc', 'a']])
-    [3, 2]
+    >>> _justify([['a', 'b']], [['abc', 'abcd']])
+    ([('  a', '   b')], [('abc', 'abcd')])
     """
-    max_length = [0] * len(seq[0])
-    for inner_seq in seq:
+    combined = head + tail  # type: Sequence[Sequence[str]]
+
+    # For each position for the sequences in ``combined``,
+    # find the length of the largest string.
+    max_length = [0] * len(combined[0])  # type: List[int]
+    for inner_seq in combined:
         length = [len(item) for item in inner_seq]
         max_length = [max(x, y) for x, y in zip(max_length, length)]
-    return max_length
+
+    # justify each item in each list-like in head and tail using max_length
+    head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
+            for seq in head]
+    tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
+            for seq in tail]
+    return head, tail
 
 
 def format_object_attrs(obj):
diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pytest
 
+import pandas as pd
 from pandas import Index, MultiIndex
 
 
@@ -54,3 +55,28 @@ def holder():
 def compat_props():
     # a MultiIndex must have these properties associated with it
     return ['shape', 'ndim', 'size']
+
+
+@pytest.fixture
+def narrow_multi_index():
+    """
+    Return a MultiIndex that is narrower than the display (<80 characters).
+    """
+    n = 1000
+    ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
+    dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
+    return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
+                                     names=['a', 'b', 'dti'])
+
+
+@pytest.fixture
+def wide_multi_index():
+    """
+    Return a MultiIndex that is wider than the display (>80 characters).
+    """
+    n = 1000
+    ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
+    dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
+    levels = [ci, ci.codes + 9, dti, dti, dti]
+    names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
+    return pd.MultiIndex.from_arrays(levels, names=names)
diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py
@@ -58,31 +58,11 @@ def test_repr_with_unicode_data():
         assert "\\" not in repr(index)  # we don't want unicode-escaped
 
 
-@pytest.mark.skip(reason="#22511 will remove this test")
-def test_repr_roundtrip():
-
+def test_repr_roundtrip_raises():
     mi = MultiIndex.from_product([list('ab'), range(3)],
                                  names=['first', 'second'])
-    str(mi)
-
-    tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
-
-    mi_u = MultiIndex.from_product(
-        [list('ab'), range(3)], names=['first', 'second'])
-    result = eval(repr(mi_u))
-    tm.assert_index_equal(result, mi_u, exact=True)
-
-    # formatting
-    str(mi)
-
-    # long format
-    mi = MultiIndex.from_product([list('abcdefg'), range(10)],
-                                 names=['first', 'second'])
-
-    tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
-
-    result = eval(repr(mi_u))
-    tm.assert_index_equal(result, mi_u, exact=True)
+    with pytest.raises(TypeError):
+        eval(repr(mi))
 
 
 def test_unicode_string_with_unicode():
@@ -107,25 +87,16 @@ def test_repr_max_seq_item_setting(idx):
 
 class TestRepr(object):
 
-    def setup_class(self):
-        n = 1000
-        ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
-        dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
-        self.narrow_mi = pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
-                                                   names=['a', 'b', 'dti'])
-
-        levels = [ci, ci.codes + 9, dti, dti, dti]
-        names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
-        self.wide_mi = pd.MultiIndex.from_arrays(levels, names=names)
-
     def test_repr(self, idx):
         result = idx[:1].__repr__()
-        expected = """MultiIndex([('foo', 'one')],
+        expected = """\
+MultiIndex([('foo', 'one')],
            dtype='object', names=['first', 'second'])"""
         assert result == expected
 
         result = idx.__repr__()
-        expected = """MultiIndex([('foo', 'one'),
+        expected = """\
+MultiIndex([('foo', 'one'),
             ('foo', 'two'),
             ('bar', 'one'),
             ('baz', 'two'),
@@ -136,22 +107,24 @@ def test_repr(self, idx):
 
         with pd.option_context('display.max_seq_items', 5):
             result = idx.__repr__()
-            expected = """MultiIndex([('foo', 'one'),
+            expected = """\
+MultiIndex([('foo', 'one'),
             ('foo', 'two'),
             ...
             ('qux', 'one'),
             ('qux', 'two')],
            dtype='object', names=['first', 'second'], length=6)"""
             assert result == expected
 
-    def test_rjust(self):
-        result = self.narrow_mi[:1].__repr__()
+    def test_rjust(self, narrow_multi_index):
+        mi = narrow_multi_index
+        result = mi[:1].__repr__()
         expected = """\
 MultiIndex([('a', 9, '2000-01-01 00:00:00')],
            dtype='object', names=['a', 'b', 'dti'])"""
         assert result == expected
 
-        result = self.narrow_mi[::500].__repr__()
+        result = mi[::500].__repr__()
         expected = """\
 MultiIndex([(  'a',  9, '2000-01-01 00:00:00'),
             (  'a',  9, '2000-01-01 00:08:20'),
@@ -160,7 +133,7 @@ def test_rjust(self):
            dtype='object', names=['a', 'b', 'dti'])"""
         assert result == expected
 
-        result = self.narrow_mi.__repr__()
+        result = mi.__repr__()
         expected = """\
 MultiIndex([(  'a',  9, '2000-01-01 00:00:00'),
             (  'a',  9, '2000-01-01 00:00:01'),
@@ -186,13 +159,14 @@ def test_rjust(self):
            dtype='object', names=['a', 'b', 'dti'], length=2000)"""
         assert result == expected
 
-    def test_tuple_width(self):
-        result = self.wide_mi[:1].__repr__()
+    def test_tuple_width(self, wide_multi_index):
+        mi = wide_multi_index
+        result = mi[:1].__repr__()
         expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
            dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
         assert result == expected
 
-        result = self.wide_mi[:10].__repr__()
+        result = mi[:10].__repr__()
         expected = """\
 MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
             ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
@@ -207,7 +181,7 @@ def test_tuple_width(self):
            dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
         assert result == expected
 
-        result = self.wide_mi.__repr__()
+        result = mi.__repr__()
         expected = """\
 MultiIndex([(  'a',  9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
             (  'a',  9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),