From ce4622329f41f8860d6dce29a3bd8ff6483d58e4 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 25 Aug 2018 22:06:20 +0100 Subject: [PATCH 01/12] ENH: better MultiIndex.__repr__ --- pandas/core/indexes/multi.py | 56 +++++++--- pandas/io/formats/printing.py | 77 ++++++++++--- pandas/tests/indexes/multi/test_format.py | 129 ++++++++++++++++++++++ 3 files changed, 235 insertions(+), 27 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9217b388ce86b..9cc76eddcc51f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -29,7 +29,8 @@ from pandas.core.indexes.frozen import FrozenList, _ensure_frozen import pandas.core.missing as missing -from pandas.io.formats.printing import pprint_thing +from pandas.io.formats.printing import ( + default_pprint, format_object_summary, pprint_thing) _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -947,28 +948,57 @@ def _nbytes(self, deep=False): # -------------------------------------------------------------------- # Rendering Methods + def _formatter_func(self, tup): + """ + Formats each item in tup according to its level's formatter function. + """ + formatter_funcs = [level._formatter_func for level in self.levels] + return tuple(func(val) for func, val in zip(formatter_funcs, tup)) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ - attrs = [ - ('levels', ibase.default_pprint(self._levels, - max_seq_items=False)), - ('codes', ibase.default_pprint(self._codes, - max_seq_items=False))] - if com._any_not_none(*self.names): - attrs.append(('names', ibase.default_pprint(self.names))) - if self.sortorder is not None: - attrs.append(('sortorder', ibase.default_pprint(self.sortorder))) + attrs = [] + attrs.append(('dtype', "'{}'".format(self.dtype))) + if self.names is not None and any(self.names): + attrs.append(('names', default_pprint(self.names))) + max_seq_items = get_option('display.max_seq_items') or len(self) + if len(self) > max_seq_items: + attrs.append(('length', len(self))) return attrs def _format_space(self): - return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + return " " def _format_data(self, name=None): - # we are formatting thru the attributes - return None + """ + Return the formatted data as a unicode string + """ + return format_object_summary(self, self._formatter_func, + name=name, is_multi=True) + + def __unicode__(self): + """ + Return a string representation for this MultiIndex. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + klass = self.__class__.__name__ + data = self._format_data() + attrs = self._format_attrs() + space = self._format_space() + + prepr = (",%s" % space).join("%s=%s" % (k, v) for k, v in attrs) + + # no data provided, just attributes + if data is None: + data = '' + + res = "%s(%s%s)" % (klass, data, prepr) + + return res def _format_native_types(self, na_rep='nan', **kwargs): new_levels = [] diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index bee66fcbfaa82..2f5ca2596fc15 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -265,7 +265,7 @@ class TableSchemaFormatter(BaseFormatter): def format_object_summary(obj, formatter, is_justify=True, name=None, - indent_for_name=True): + indent_for_name=True, is_multi=False): """ Return the formatted obj as a unicode string @@ -282,6 +282,8 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, indent_for_name : bool, default True Whether subsequent lines should be be indented to align with the name. + is_multi : bool, default False + Is ``obj`` a :class:`MultiIndex` or not Returns ------- @@ -306,7 +308,7 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, space2 = "\n " # space for the opening '[' n = len(obj) - sep = ',' + sep = ',' if not is_multi else (',\n ' + ' ' * len(name)) max_seq_items = get_option('display.max_seq_items') or n # are we a truncated display @@ -334,10 +336,10 @@ def best_len(values): if n == 0: summary = '[]{}'.format(close) - elif n == 1: + elif n == 1 and not is_multi: first = formatter(obj[0]) summary = '[{}]{}'.format(first, close) - elif n == 2: + elif n == 2 and not is_multi: first = formatter(obj[0]) last = formatter(obj[-1]) summary = '[{}, {}]{}'.format(first, last, close) @@ -353,15 +355,16 @@ def best_len(values): # adjust all values to max length if needed if is_justify: - - # however, if we are not truncated and we are only a single - # line, then don't justify - if (is_truncated or - not (len(', '.join(head)) < display_width and - len(', '.join(tail)) < display_width)): - max_len = max(best_len(head), best_len(tail)) - head = [x.rjust(max_len) for x in head] - tail = [x.rjust(max_len) for x in tail] + head, tail = _justify(head, tail, display_width, best_len, + is_truncated, is_multi) + if is_multi: + max_space = display_width - len(space2) + item = tail[0] + for i in reversed(range(1, len(item) + 1)): + if len(_pprint_seq(item, max_seq_items=i)) < max_space: + break + head = [_pprint_seq(x, max_seq_items=i) for x in head] + tail = [_pprint_seq(x, max_seq_items=i) for x in tail] summary = "" line = space2 @@ -391,7 +394,7 @@ def best_len(values): close = ']' + close.rstrip(' ') summary += close - if len(summary) > (display_width): + if len(summary) > (display_width) or is_multi: summary += space1 else: # one row summary += ' ' @@ -402,6 +405,52 @@ def best_len(values): return summary +def _justify(head, tail, display_width, best_len, + is_truncated=False, is_multi=False): + """ + Justify each item in head and tail, so they align properly. + """ + if is_multi: + max_length = _max_level_item_length(head + tail) + head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in head] + tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in tail] + elif (is_truncated or not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + max_length = max(best_len(head), best_len(tail)) + head = [x.rjust(max_length) for x in head] + tail = [x.rjust(max_length) for x in tail] + + return head, tail + + +def _max_level_item_length(seq): + """ + For each position for the sequences in ``seq``, find the largest length. + + Used for justifying individual values in a :class:`pandas.MultiIndex`. + + Parameters + ---------- + seq : list-like of list-likes of strings + + Returns + ------- + max_length : list of ints + + Examples + -------- + >>> _max_level_item_length([['s', 'ab'], ['abc', 'a']]) + [3, 2] + """ + max_length = [0] * len(seq[0]) + for inner_seq in seq: + length = [len(item) for item in inner_seq] + max_length = [max(x, y) for x, y in zip(max_length, length)] + return max_length + + def format_object_attrs(obj): """ Return a list of tuples of the (attr, formatted_value) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index c320cb32b856c..fb3da87a971db 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -94,3 +94,132 @@ def test_repr_max_seq_item_setting(idx): with pd.option_context("display.max_seq_items", None): repr(idx) assert '...' not in str(idx) + + +class TestRepr(object): + + def setup_class(self): + n = 1000 + ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) + dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) + self.narrow_mi = pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], + names=['a', 'b', 'dti']) + + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3'] + self.wide_mi = pd.MultiIndex.from_arrays(levels, names=names) + + def test_repr(self, idx): + result = idx[:1].__repr__() + expected = """MultiIndex([('foo', 'one')], + dtype='object', names=['first', 'second'])""" + assert result == expected + + result = idx.__repr__() + expected = """MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + dtype='object', names=['first', 'second'])""" + assert result == expected + + with pd.option_context('display.max_seq_items', 5): + result = idx.__repr__() + expected = """MultiIndex([('foo', 'one'), + ('foo', 'two'), + ... + ('qux', 'one'), + ('qux', 'two')], + dtype='object', names=['first', 'second'], length=6)""" + assert result == expected + + def test_rjust(self): + result = self.narrow_mi[:1].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00')], + dtype='object', names=['a', 'b', 'dti'])""" + assert result == expected + + result = self.narrow_mi[::500].__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:08:20'), + ('abc', 10, '2000-01-01 00:16:40'), + ('abc', 10, '2000-01-01 00:25:00')], + dtype='object', names=['a', 'b', 'dti'])""" + assert result == expected + + result = self.narrow_mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:00:01'), + ( 'a', 9, '2000-01-01 00:00:02'), + ( 'a', 9, '2000-01-01 00:00:03'), + ( 'a', 9, '2000-01-01 00:00:04'), + ( 'a', 9, '2000-01-01 00:00:05'), + ( 'a', 9, '2000-01-01 00:00:06'), + ( 'a', 9, '2000-01-01 00:00:07'), + ( 'a', 9, '2000-01-01 00:00:08'), + ( 'a', 9, '2000-01-01 00:00:09'), + ... + ('abc', 10, '2000-01-01 00:33:10'), + ('abc', 10, '2000-01-01 00:33:11'), + ('abc', 10, '2000-01-01 00:33:12'), + ('abc', 10, '2000-01-01 00:33:13'), + ('abc', 10, '2000-01-01 00:33:14'), + ('abc', 10, '2000-01-01 00:33:15'), + ('abc', 10, '2000-01-01 00:33:16'), + ('abc', 10, '2000-01-01 00:33:17'), + ('abc', 10, '2000-01-01 00:33:18'), + ('abc', 10, '2000-01-01 00:33:19')], + dtype='object', names=['a', 'b', 'dti'], length=2000)""" + assert result == expected + + def test_tuple_width(self): + result = self.wide_mi[:1].__repr__() + expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], + dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = self.wide_mi[:10].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], + dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = self.wide_mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), + ... + ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), + ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), + ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), + ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), + ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), + ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), + ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), + ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), + ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), + ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], + dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa + assert result == expected From 7bc53648ff099c340a847d8614a43c04a648ff9f Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 31 Aug 2018 15:15:38 +0100 Subject: [PATCH 02/12] changed according to comments --- pandas/core/indexes/multi.py | 27 +----- pandas/io/formats/printing.py | 104 ++++++++++++---------- pandas/tests/indexes/multi/conftest.py | 26 ++++++ pandas/tests/indexes/multi/test_format.py | 64 ++++--------- 4 files changed, 102 insertions(+), 119 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9cc76eddcc51f..630c07f2fc119 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -959,8 +959,7 @@ def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ - attrs = [] - attrs.append(('dtype', "'{}'".format(self.dtype))) + attrs = [('dtype', "'{}'".format(self.dtype))] if self.names is not None and any(self.names): attrs.append(('names', default_pprint(self.names))) max_seq_items = get_option('display.max_seq_items') or len(self) @@ -976,29 +975,7 @@ def _format_data(self, name=None): Return the formatted data as a unicode string """ return format_object_summary(self, self._formatter_func, - name=name, is_multi=True) - - def __unicode__(self): - """ - Return a string representation for this MultiIndex. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - klass = self.__class__.__name__ - data = self._format_data() - attrs = self._format_attrs() - space = self._format_space() - - prepr = (",%s" % space).join("%s=%s" % (k, v) for k, v in attrs) - - # no data provided, just attributes - if data is None: - data = '' - - res = "%s(%s%s)" % (klass, data, prepr) - - return res + name=name, line_break_each_value=True) def _format_native_types(self, na_rep='nan', **kwargs): new_levels = [] diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 2f5ca2596fc15..0f2a067e52702 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -265,7 +265,7 @@ class TableSchemaFormatter(BaseFormatter): def format_object_summary(obj, formatter, is_justify=True, name=None, - indent_for_name=True, is_multi=False): + indent_for_name=True, line_break_each_value=False): """ Return the formatted obj as a unicode string @@ -282,8 +282,10 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, indent_for_name : bool, default True Whether subsequent lines should be be indented to align with the name. - is_multi : bool, default False - Is ``obj`` a :class:`MultiIndex` or not + line_break_each_value : bool, default False + If True, inserts a line break for each value of ``obj``. + If False, only break lines when the a line of values gets wider + than the display width Returns ------- @@ -308,7 +310,11 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, space2 = "\n " # space for the opening '[' n = len(obj) - sep = ',' if not is_multi else (',\n ' + ' ' * len(name)) + if not line_break_each_value: + sep = ',' + else: + # If we want to align on each value, we need a different separator. + sep = (',\n ' + ' ' * len(name)) max_seq_items = get_option('display.max_seq_items') or n # are we a truncated display @@ -336,10 +342,10 @@ def best_len(values): if n == 0: summary = '[]{}'.format(close) - elif n == 1 and not is_multi: + elif n == 1 and not line_break_each_value: first = formatter(obj[0]) summary = '[{}]{}'.format(first, close) - elif n == 2 and not is_multi: + elif n == 2 and not line_break_each_value: first = formatter(obj[0]) last = formatter(obj[-1]) summary = '[{}, {}]{}'.format(first, last, close) @@ -355,22 +361,31 @@ def best_len(values): # adjust all values to max length if needed if is_justify: - head, tail = _justify(head, tail, display_width, best_len, - is_truncated, is_multi) - if is_multi: + if line_break_each_value: + head, tail = _justify(head, tail) + elif (is_truncated or not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + max_length = max(best_len(head), best_len(tail)) + head = [x.rjust(max_length) for x in head] + tail = [x.rjust(max_length) for x in tail] + # If we are not truncated and we are only a single + # line, then don't justify + + if line_break_each_value: + # truncate vertically if wider than max_space max_space = display_width - len(space2) item = tail[0] - for i in reversed(range(1, len(item) + 1)): - if len(_pprint_seq(item, max_seq_items=i)) < max_space: + for max_items in reversed(range(1, len(item) + 1)): + if len(_pprint_seq(item, max_seq_items=max_items)) < max_space: break - head = [_pprint_seq(x, max_seq_items=i) for x in head] - tail = [_pprint_seq(x, max_seq_items=i) for x in tail] + head = [_pprint_seq(x, max_seq_items=max_items) for x in head] + tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] summary = "" line = space2 - for i in range(len(head)): - word = head[i] + sep + ' ' + for max_items in range(len(head)): + word = head[max_items] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) @@ -379,8 +394,8 @@ def best_len(values): summary += line.rstrip() + space2 + '...' line = space2 - for i in range(len(tail) - 1): - word = tail[i] + sep + ' ' + for max_items in range(len(tail) - 1): + word = tail[max_items] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) @@ -394,7 +409,7 @@ def best_len(values): close = ']' + close.rstrip(' ') summary += close - if len(summary) > (display_width) or is_multi: + if len(summary) > (display_width) or line_break_each_value: summary += space1 else: # one row summary += ' ' @@ -405,50 +420,41 @@ def best_len(values): return summary -def _justify(head, tail, display_width, best_len, - is_truncated=False, is_multi=False): - """ - Justify each item in head and tail, so they align properly. +def _justify(head, tail): """ - if is_multi: - max_length = _max_level_item_length(head + tail) - head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) - for seq in head] - tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) - for seq in tail] - elif (is_truncated or not (len(', '.join(head)) < display_width and - len(', '.join(tail)) < display_width)): - max_length = max(best_len(head), best_len(tail)) - head = [x.rjust(max_length) for x in head] - tail = [x.rjust(max_length) for x in tail] - - return head, tail - - -def _max_level_item_length(seq): - """ - For each position for the sequences in ``seq``, find the largest length. - - Used for justifying individual values in a :class:`pandas.MultiIndex`. + Justify each item in each list-like in head and tail, so each item + right-aligns when the two list-likes are stacked vertically. Parameters ---------- - seq : list-like of list-likes of strings + head : list-like of list-likes of strings + tail : list-like of list-likes of strings Returns ------- - max_length : list of ints + head : list of tuples of strings + tail : list of tuples of strings Examples -------- - >>> _max_level_item_length([['s', 'ab'], ['abc', 'a']]) - [3, 2] + >>> _justify([['a', 'b']], [['abc', 'abcd']]) + ([(' a', ' b')], [('abc', 'abcd')]) """ - max_length = [0] * len(seq[0]) - for inner_seq in seq: + combined = head + tail # type: Sequence[Sequence[str]] + + # For each position for the sequences in ``combined``, + # find the length of the largest string. + max_length = [0] * len(combined[0]) # type: List[int] + for inner_seq in combined: length = [len(item) for item in inner_seq] max_length = [max(x, y) for x, y in zip(max_length, length)] - return max_length + + # justify each item in each list-like in head and tail using max_length + head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in head] + tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in tail] + return head, tail def format_object_attrs(obj): diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 956d2e6cc17e3..307772347e8f5 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import Index, MultiIndex @@ -52,3 +53,28 @@ def holder(): def compat_props(): # a MultiIndex must have these properties associated with it return ['shape', 'ndim', 'size'] + + +@pytest.fixture +def narrow_multi_index(): + """ + Return a MultiIndex that is narrower than the display (<80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) + dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) + return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], + names=['a', 'b', 'dti']) + + +@pytest.fixture +def wide_multi_index(): + """ + Return a MultiIndex that is wider than the display (>80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) + dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3'] + return pd.MultiIndex.from_arrays(levels, names=names) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index fb3da87a971db..998bc5797bfdc 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -55,31 +55,11 @@ def test_repr_with_unicode_data(): assert "\\" not in repr(index) # we don't want unicode-escaped -@pytest.mark.skip(reason="#22511 will remove this test") -def test_repr_roundtrip(): - +def test_repr_roundtrip_raises(): mi = MultiIndex.from_product([list('ab'), range(3)], names=['first', 'second']) - str(mi) - - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - - mi_u = MultiIndex.from_product( - [list('ab'), range(3)], names=['first', 'second']) - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - # formatting - str(mi) - - # long format - mi = MultiIndex.from_product([list('abcdefg'), range(10)], - names=['first', 'second']) - - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) + with pytest.raises(TypeError): + eval(repr(mi)) def test_unicode_string_with_unicode(): @@ -98,25 +78,16 @@ def test_repr_max_seq_item_setting(idx): class TestRepr(object): - def setup_class(self): - n = 1000 - ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) - dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) - self.narrow_mi = pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], - names=['a', 'b', 'dti']) - - levels = [ci, ci.codes + 9, dti, dti, dti] - names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3'] - self.wide_mi = pd.MultiIndex.from_arrays(levels, names=names) - def test_repr(self, idx): result = idx[:1].__repr__() - expected = """MultiIndex([('foo', 'one')], + expected = """\ +MultiIndex([('foo', 'one')], dtype='object', names=['first', 'second'])""" assert result == expected result = idx.__repr__() - expected = """MultiIndex([('foo', 'one'), + expected = """\ +MultiIndex([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), @@ -127,7 +98,8 @@ def test_repr(self, idx): with pd.option_context('display.max_seq_items', 5): result = idx.__repr__() - expected = """MultiIndex([('foo', 'one'), + expected = """\ +MultiIndex([('foo', 'one'), ('foo', 'two'), ... ('qux', 'one'), @@ -135,14 +107,15 @@ def test_repr(self, idx): dtype='object', names=['first', 'second'], length=6)""" assert result == expected - def test_rjust(self): - result = self.narrow_mi[:1].__repr__() + def test_rjust(self, narrow_multi_index): + mi = narrow_multi_index + result = mi[:1].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00')], dtype='object', names=['a', 'b', 'dti'])""" assert result == expected - result = self.narrow_mi[::500].__repr__() + result = mi[::500].__repr__() expected = """\ MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), ( 'a', 9, '2000-01-01 00:08:20'), @@ -151,7 +124,7 @@ def test_rjust(self): dtype='object', names=['a', 'b', 'dti'])""" assert result == expected - result = self.narrow_mi.__repr__() + result = mi.__repr__() expected = """\ MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), ( 'a', 9, '2000-01-01 00:00:01'), @@ -177,13 +150,14 @@ def test_rjust(self): dtype='object', names=['a', 'b', 'dti'], length=2000)""" assert result == expected - def test_tuple_width(self): - result = self.wide_mi[:1].__repr__() + def test_tuple_width(self, wide_multi_index): + mi = wide_multi_index + result = mi[:1].__repr__() expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" assert result == expected - result = self.wide_mi[:10].__repr__() + result = mi[:10].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), @@ -198,7 +172,7 @@ def test_tuple_width(self): dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" assert result == expected - result = self.wide_mi.__repr__() + result = mi.__repr__() expected = """\ MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), From b502d8efdfca6d32ca1c1a6130276c065fcea6cd Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 2 Sep 2018 09:40:10 +0100 Subject: [PATCH 03/12] inherit _format_attrs and _format_space --- pandas/core/indexes/multi.py | 15 --------------- pandas/io/formats/printing.py | 11 +++++++---- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 630c07f2fc119..813615bdb04d8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -955,21 +955,6 @@ def _formatter_func(self, tup): formatter_funcs = [level._formatter_func for level in self.levels] return tuple(func(val) for func, val in zip(formatter_funcs, tup)) - def _format_attrs(self): - """ - Return a list of tuples of the (attr,formatted_value) - """ - attrs = [('dtype', "'{}'".format(self.dtype))] - if self.names is not None and any(self.names): - attrs.append(('names', default_pprint(self.names))) - max_seq_items = get_option('display.max_seq_items') or len(self) - if len(self) > max_seq_items: - attrs.append(('length', len(self))) - return attrs - - def _format_space(self): - return " " - def _format_data(self, name=None): """ Return the formatted data as a unicode string diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 0f2a067e52702..135cf29d53817 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -310,11 +310,12 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, space2 = "\n " # space for the opening '[' n = len(obj) - if not line_break_each_value: - sep = ',' + if line_break_each_value: + # If we want to vertically align on each value of obj, we need to + # separate values by a line break and indent the values + sep = ',\n ' + ' ' * len(name) else: - # If we want to align on each value, we need a different separator. - sep = (',\n ' + ' ' * len(name)) + sep = ',' max_seq_items = get_option('display.max_seq_items') or n # are we a truncated display @@ -477,6 +478,8 @@ def format_object_attrs(obj): attrs.append(('dtype', "'{}'".format(obj.dtype))) if getattr(obj, 'name', None) is not None: attrs.append(('name', default_pprint(obj.name))) + elif getattr(obj, 'names', None) is not None and any(obj.names): + attrs.append(('names', default_pprint(obj.names))) max_seq_items = get_option('display.max_seq_items') or len(obj) if len(obj) > max_seq_items: attrs.append(('length', len(obj))) From 0590f46df5660f40f4d501bf9c6e837b633b36ec Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 4 Sep 2018 19:05:26 +0100 Subject: [PATCH 04/12] changed according to comments --- pandas/io/formats/printing.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 135cf29d53817..e1552a0531b3c 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -363,9 +363,13 @@ def best_len(values): # adjust all values to max length if needed if is_justify: if line_break_each_value: + # Justify each string in the values of head and tail, so the + # strings will right align when head and tail are stacked + # vertically. head, tail = _justify(head, tail) elif (is_truncated or not (len(', '.join(head)) < display_width and len(', '.join(tail)) < display_width)): + # Each string in head and tail should align with each other max_length = max(best_len(head), best_len(tail)) head = [x.rjust(max_length) for x in head] tail = [x.rjust(max_length) for x in tail] @@ -373,11 +377,15 @@ def best_len(values): # line, then don't justify if line_break_each_value: - # truncate vertically if wider than max_space + # Now head and tail are of type List[Tuple[str]]. Below we + # convert them into List[str], so there will be one string per + # value. Also truncate items horizontally if wider than + # max_space max_space = display_width - len(space2) - item = tail[0] - for max_items in reversed(range(1, len(item) + 1)): - if len(_pprint_seq(item, max_seq_items=max_items)) < max_space: + value = tail[0] + for max_items in reversed(range(1, len(value) + 1)): + pprinted_seq = _pprint_seq(value, max_seq_items=max_items) + if len(pprinted_seq) < max_space: break head = [_pprint_seq(x, max_seq_items=max_items) for x in head] tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] From ff0e93ba28e4e0b9986ac97858b06c1d7a690ba5 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 8 Sep 2018 12:28:36 +0100 Subject: [PATCH 05/12] minor update for doc strings --- pandas/io/formats/printing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index e1552a0531b3c..98ba2675509ca 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -287,6 +287,8 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, If False, only break lines when the a line of values gets wider than the display width + .. versionadded:: 0.24.0 + Returns ------- summary string From b36da1cb71d522a240591252c3b555c77c56dde4 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 9 Sep 2018 12:08:05 +0100 Subject: [PATCH 06/12] Update doc string examples and docs --- doc/source/user_guide/advanced.rst | 9 +- pandas/core/indexes/multi.py | 147 +++++++++++++++++++++++------ 2 files changed, 121 insertions(+), 35 deletions(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 0e68cddde8bc7..a5cc6030b1c48 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -182,15 +182,15 @@ on a deeper level. Defined Levels ~~~~~~~~~~~~~~ -The repr of a ``MultiIndex`` shows all the defined levels of an index, even +The ``MultiIndex`` keeps all the defined levels of an index, even if they are not actually used. When slicing an index, you may notice this. For example: .. ipython:: python -   df.columns # original MultiIndex +   df.columns.levels # original MultiIndex - df[['foo','qux']].columns # sliced + df[['foo','qux']].columns.levels # sliced This is done to avoid a recomputation of the levels in order to make slicing highly performant. If you want to see only the used levels, you can use the @@ -210,7 +210,8 @@ To reconstruct the ``MultiIndex`` with only the used levels, the .. ipython:: python - df[['foo', 'qux']].columns.remove_unused_levels() + new_mi = df[['foo', 'qux']].columns.remove_unused_levels() + new_mi.levels Data alignment and using ``reindex`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 813615bdb04d8..368ec0eaa1c1a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -421,9 +421,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None): >>> tuples = [(1, 'red'), (1, 'blue'), ... (2, 'red'), (2, 'blue')] >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) - MultiIndex(levels=[[1, 2], ['blue', 'red']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], - names=['number', 'color']) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + dtype='object', names=['number', 'color']) """ if not is_list_like(tuples): raise TypeError('Input must be a list / sequence of tuple-likes.') @@ -478,9 +480,13 @@ def from_product(cls, iterables, sortorder=None, names=None): >>> colors = ['green', 'purple'] >>> pd.MultiIndex.from_product([numbers, colors], ... names=['number', 'color']) - MultiIndex(levels=[[0, 1, 2], ['green', 'purple']], - codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], - names=['number', 'color']) + MultiIndex([(0, 'green'), + (0, 'purple'), + (1, 'green'), + (1, 'purple'), + (2, 'green'), + (2, 'purple')], + dtype='object', names=['number', 'color']) """ from pandas.core.arrays.categorical import _factorize_from_iterables from pandas.core.reshape.util import cartesian_product @@ -664,6 +670,7 @@ def set_levels(self, levels, level=None, inplace=False, >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')], names=['foo', 'bar']) +<<<<<<< HEAD >>> idx.set_levels([['a','b'], [1,2]]) MultiIndex(levels=[['a', 'b'], [1, 2]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], @@ -680,6 +687,32 @@ def set_levels(self, levels, level=None, inplace=False, MultiIndex(levels=[['a', 'b'], [1, 2]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=['foo', 'bar']) +======= + >>> idx.set_levels([['a', 'b'], [1, 2]]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2)], + dtype='object', names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b'], level=0) + MultiIndex([('a', 'one'), + ('a', 'two'), + ('b', 'one'), + ('b', 'two')], + dtype='object', names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b'], level='bar') + MultiIndex([(1, 'a'), + (1, 'b'), + (2, 'a'), + (2, 'b')], + dtype='object', names=['foo', 'bar']) + >>> idx.set_levels([['a', 'b'], [1, 2]], level=[0, 1]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2)], + dtype='object', names=['foo', 'bar']) +>>>>>>> Update doc string examples and docs """ if is_list_like(levels) and not isinstance(levels, Index): levels = list(levels) @@ -783,6 +816,7 @@ def set_codes(self, codes, level=None, inplace=False, >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')], names=['foo', 'bar']) +<<<<<<< HEAD >>> idx.set_codes([[1,0,1,0], [0,0,1,1]]) MultiIndex(levels=[[1, 2], ['one', 'two']], codes=[[1, 0, 1, 0], [0, 0, 1, 1]], @@ -799,6 +833,32 @@ def set_codes(self, codes, level=None, inplace=False, MultiIndex(levels=[[1, 2], ['one', 'two']], codes=[[1, 0, 1, 0], [0, 0, 1, 1]], names=['foo', 'bar']) +======= + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + dtype='object', names=['foo', 'bar']) + >>> idx.set_codes([1, 0, 1, 0], level=0) + MultiIndex([(2, 'one'), + (1, 'two'), + (2, 'one'), + (1, 'two')], + dtype='object', names=['foo', 'bar']) + >>> idx.set_codes([0, 0, 1, 1], level='bar') + MultiIndex([(1, 'one'), + (1, 'one'), + (2, 'two'), + (2, 'two')], + dtype='object', names=['foo', 'bar']) + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + dtype='object', names=['foo', 'bar']) +>>>>>>> Update doc string examples and docs """ if level is not None and not is_list_like(level): if not is_list_like(codes): @@ -1547,9 +1607,19 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]) >>> idx.to_hierarchical(3) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) + MultiIndex([(1, 'one'), + (1, 'one'), + (1, 'one'), + (1, 'two'), + (1, 'two'), + (1, 'two'), + (2, 'one'), + (2, 'one'), + (2, 'one'), + (2, 'two'), + (2, 'two'), + (2, 'two')], + dtype='object') """ levels = self.levels codes = [np.repeat(level_codes, n_repeat) for @@ -1640,16 +1710,21 @@ def _sort_levels_monotonic(self): Examples -------- - >>> i = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - >>> i - MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - - >>> i.sort_monotonic() - MultiIndex(levels=[['a', 'b'], ['aa', 'bb']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]]) + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + dtype='object') + >>> mi.sort_values() + MultiIndex([('a', 'aa'), + ('a', 'bb'), + ('b', 'aa'), + ('b', 'bb')], + dtype='object') """ if self.is_lexsorted() and self.is_monotonic: @@ -1698,20 +1773,25 @@ def remove_unused_levels(self): Examples -------- - >>> i = pd.MultiIndex.from_product([range(2), list('ab')]) - MultiIndex(levels=[[0, 1], ['a', 'b']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) + >>> mi + MultiIndex([(0, 'a'), + (0, 'b'), + (1, 'a'), + (1, 'b')], + dtype='object') - >>> i[2:] - MultiIndex(levels=[[0, 1], ['a', 'b']], - codes=[[1, 1], [0, 1]]) + >>> mi[2:] + MultiIndex([(1, 'a'), + (1, 'b')], + dtype='object') The 0 from the first level is not represented and can be removed - >>> i[2:].remove_unused_levels() - MultiIndex(levels=[[1], ['a', 'b']], - codes=[[0, 0], [0, 1]]) + >>> mi2 = mi[2:].remove_unused_levels() + >>> mi2.levels + FrozenList([[1], ['a', 'b']]) """ new_levels = [] @@ -2018,11 +2098,16 @@ def swaplevel(self, i=-2, j=-1): >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) >>> mi - MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + dtype='object') >>> mi.swaplevel(0, 1) - MultiIndex(levels=[['bb', 'aa'], ['a', 'b']], - codes=[[0, 1, 0, 1], [0, 0, 1, 1]]) + MultiIndex([('bb', 'a'), + ('aa', 'a'), + ('bb', 'b'), + ('aa', 'b')], """ new_levels = list(self.levels) new_codes = list(self.codes) From 7a8512e813ff4ed7ce407d2d3b1038e3436681af Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 16 Sep 2018 08:26:24 +0100 Subject: [PATCH 07/12] Comment on support for py2 --- doc/source/user_guide/advanced.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index a5cc6030b1c48..fb9b9db428e34 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -182,7 +182,7 @@ on a deeper level. Defined Levels ~~~~~~~~~~~~~~ -The ``MultiIndex`` keeps all the defined levels of an index, even +The :class:`MultiIndex` keeps all the defined levels of an index, even if they are not actually used. When slicing an index, you may notice this. For example: From cb2f904798258a6659ce615877fb040fa5b36507 Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 20 Sep 2018 19:18:09 +0100 Subject: [PATCH 08/12] Improve docs --- doc/source/whatsnew/v0.25.0.rst | 32 +++++++++++++++++++++ pandas/core/indexes/base.py | 50 ++++++++++++++++++++------------- pandas/core/indexes/multi.py | 22 +-------------- pandas/core/strings.py | 5 ++-- 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 7d123697d3d20..034ffa98ae5f1 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -74,6 +74,38 @@ a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.depreca See :ref:`_groupby.aggregate.named` for more. + +.. _whatsnew_0240.enhancements.multi_index_repr: + +Better repr for MultiIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Printing of :class:`MultiIndex` instances now shows tuples of each row and ensures +that the tuple items are vertically aligned, so it's now easier to understand +the structure of the ``MultiIndex``. (:issue:`13480`): + +The repr now looks like this: + +.. ipython:: python + + pd.MultiIndex.from_product([['a', 'abc'], range(500)]) + +Previously, outputting a :class:`MultiIndex` printed all the ``levels`` and +``labels`` of the ``MultiIndex``, which was visually unappealing and made +the output more difficult to navigate: + +.. code-block:: ipython + + >>>pd.MultiIndex.from_product([['a', 'abc'], range(5)]) + MultiIndex(levels=[['a', 'abc'], [0, 1, 2, 3]], + labels=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3]]) + +In the new repr, all values will be shown, if the number of rows is smaller +than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally, +the output will truncate, if it's wider than :attr:`options.display.width` +(default: 80 characters). + + .. _whatsnew_0250.enhancements.other: Other Enhancements diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4fb9c4197109f..a584b79761afb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1322,17 +1322,24 @@ def set_names(self, names, level=None, inplace=False): >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], ... [2018, 2019]]) >>> idx - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]]) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + dtype='object') >>> idx.set_names(['kind', 'year'], inplace=True) >>> idx - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], - names=['kind', 'year']) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + dtype='object', names=['kind', 'year']) >>> idx.set_names('species', level=0) - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], - names=['species', 'year']) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + dtype='object', names=['species', 'year']) """ if level is not None and not isinstance(self, ABCMultiIndex): @@ -1393,13 +1400,17 @@ def rename(self, name, inplace=False): ... [2018, 2019]], ... names=['kind', 'year']) >>> idx - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], - names=['kind', 'year']) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + dtype='object', names=['kind', 'year']) >>> idx.rename(['species', 'year']) - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], - names=['species', 'year']) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + dtype='object', names=['species', 'year']) >>> idx.rename('species') Traceback (most recent call last): TypeError: Must pass list-like as `names`. @@ -5420,9 +5431,9 @@ def ensure_index_from_sequences(sequences, names=None): >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']], names=['L1', 'L2']) - MultiIndex(levels=[['a'], ['a', 'b']], - codes=[[0, 0], [0, 1]], - names=['L1', 'L2']) + MultiIndex([('a', 'a'), + ('a', 'b')], + dtype='object', names=['L1', 'L2']) See Also -------- @@ -5461,8 +5472,9 @@ def ensure_index(index_like, copy=False): Index([('a', 'a'), ('b', 'c')], dtype='object') >>> ensure_index([['a', 'a'], ['b', 'c']]) - MultiIndex(levels=[['a'], ['b', 'c']], - codes=[[0, 0], [0, 1]]) + MultiIndex([('a', 'b'), + ('a', 'c')], + dtype='object') See Also -------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 368ec0eaa1c1a..93ef276bd5fc4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -29,8 +29,7 @@ from pandas.core.indexes.frozen import FrozenList, _ensure_frozen import pandas.core.missing as missing -from pandas.io.formats.printing import ( - default_pprint, format_object_summary, pprint_thing) +from pandas.io.formats.printing import format_object_summary, pprint_thing _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -816,24 +815,6 @@ def set_codes(self, codes, level=None, inplace=False, >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')], names=['foo', 'bar']) -<<<<<<< HEAD - >>> idx.set_codes([[1,0,1,0], [0,0,1,1]]) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[1, 0, 1, 0], [0, 0, 1, 1]], - names=['foo', 'bar']) - >>> idx.set_codes([1,0,1,0], level=0) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[1, 0, 1, 0], [0, 1, 0, 1]], - names=['foo', 'bar']) - >>> idx.set_codes([0,0,1,1], level='bar') - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[0, 0, 1, 1], [0, 0, 1, 1]], - names=['foo', 'bar']) - >>> idx.set_codes([[1,0,1,0], [0,0,1,1]], level=[0,1]) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[1, 0, 1, 0], [0, 0, 1, 1]], - names=['foo', 'bar']) -======= >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) MultiIndex([(2, 'one'), (1, 'one'), @@ -858,7 +839,6 @@ def set_codes(self, codes, level=None, inplace=False, (2, 'two'), (1, 'two')], dtype='object', names=['foo', 'bar']) ->>>>>>> Update doc string examples and docs """ if level is not None and not is_list_like(level): if not is_list_like(codes): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index bd756491abd2f..edfd3e7cf2fed 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2548,8 +2548,9 @@ def rsplit(self, pat=None, n=-1, expand=False): Which will create a MultiIndex: >>> idx.str.partition() - MultiIndex(levels=[['X', 'Y'], [' '], ['123', '999']], - codes=[[0, 1], [0, 0], [0, 1]]) + MultiIndex([('X', ' ', '123'), + ('Y', ' ', '999')], + dtype='object') Or an index with tuples with ``expand=False``: From 7c846571cace10c007120b9b62949804a2e1789a Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 9 Dec 2018 20:00:47 +0000 Subject: [PATCH 09/12] don't show useless dtype in repr --- doc/source/whatsnew/v0.25.0.rst | 8 +- pandas/core/indexes/base.py | 13 +-- pandas/core/indexes/multi.py | 85 ++++++++++---------- pandas/io/formats/printing.py | 6 +- pandas/tests/indexes/multi/test_format.py | 18 ++--- pandas/tests/util/test_assert_index_equal.py | 22 ++++- 6 files changed, 83 insertions(+), 69 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 034ffa98ae5f1..09224f8b199a4 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -91,14 +91,14 @@ The repr now looks like this: pd.MultiIndex.from_product([['a', 'abc'], range(500)]) Previously, outputting a :class:`MultiIndex` printed all the ``levels`` and -``labels`` of the ``MultiIndex``, which was visually unappealing and made +``codes`` of the ``MultiIndex``, which was visually unappealing and made the output more difficult to navigate: .. code-block:: ipython - >>>pd.MultiIndex.from_product([['a', 'abc'], range(5)]) - MultiIndex(levels=[['a', 'abc'], [0, 1, 2, 3]], - labels=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3]]) + In [1]: pd.MultiIndex.from_product([['a', 'abc'], range(5)]) + Out[1]: MultiIndex(levels=[['a', 'abc'], [0, 1, 2, 3]], + ...: codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3]]) In the new repr, all values will be shown, if the number of rows is smaller than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a584b79761afb..cd90ab63fb83d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1326,20 +1326,20 @@ def set_names(self, names, level=None, inplace=False): ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - dtype='object') + ) >>> idx.set_names(['kind', 'year'], inplace=True) >>> idx MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - dtype='object', names=['kind', 'year']) + names=['kind', 'year']) >>> idx.set_names('species', level=0) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - dtype='object', names=['species', 'year']) + names=['species', 'year']) """ if level is not None and not isinstance(self, ABCMultiIndex): @@ -1404,13 +1404,13 @@ def rename(self, name, inplace=False): ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - dtype='object', names=['kind', 'year']) + names=['kind', 'year']) >>> idx.rename(['species', 'year']) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - dtype='object', names=['species', 'year']) + names=['species', 'year']) >>> idx.rename('species') Traceback (most recent call last): TypeError: Must pass list-like as `names`. @@ -5433,7 +5433,7 @@ def ensure_index_from_sequences(sequences, names=None): names=['L1', 'L2']) MultiIndex([('a', 'a'), ('a', 'b')], - dtype='object', names=['L1', 'L2']) + names=['L1', 'L2']) See Also -------- @@ -5475,6 +5475,7 @@ def ensure_index(index_like, copy=False): MultiIndex([('a', 'b'), ('a', 'c')], dtype='object') + ) See Also -------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 93ef276bd5fc4..42fc25e5223e2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -29,7 +29,8 @@ from pandas.core.indexes.frozen import FrozenList, _ensure_frozen import pandas.core.missing as missing -from pandas.io.formats.printing import format_object_summary, pprint_thing +from pandas.io.formats.printing import ( + format_object_attrs, format_object_summary, pprint_thing) _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -193,8 +194,10 @@ class MultiIndex(Index): >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) - MultiIndex(levels=[[1, 2], ['blue', 'red']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], names=['number', 'color']) See further examples for how to construct a MultiIndex in the doc strings @@ -359,8 +362,10 @@ def from_arrays(cls, arrays, sortorder=None, names=None): -------- >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) - MultiIndex(levels=[[1, 2], ['blue', 'red']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], names=['number', 'color']) """ error_msg = "Input must be a list / sequence of array-likes." @@ -424,7 +429,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): (1, 'blue'), (2, 'red'), (2, 'blue')], - dtype='object', names=['number', 'color']) + names=['number', 'color']) """ if not is_list_like(tuples): raise TypeError('Input must be a list / sequence of tuple-likes.') @@ -485,7 +490,7 @@ def from_product(cls, iterables, sortorder=None, names=None): (1, 'purple'), (2, 'green'), (2, 'purple')], - dtype='object', names=['number', 'color']) + names=['number', 'color']) """ from pandas.core.arrays.categorical import _factorize_from_iterables from pandas.core.reshape.util import cartesian_product @@ -543,15 +548,19 @@ def from_frame(cls, df, sortorder=None, names=None): 3 NJ Precip >>> pd.MultiIndex.from_frame(df) - MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], names=['a', 'b']) Using explicit names, instead of the column names >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) - MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], names=['state', 'observation']) """ if not isinstance(df, ABCDataFrame): @@ -669,49 +678,30 @@ def set_levels(self, levels, level=None, inplace=False, >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')], names=['foo', 'bar']) -<<<<<<< HEAD - >>> idx.set_levels([['a','b'], [1,2]]) - MultiIndex(levels=[['a', 'b'], [1, 2]], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', 'bar']) - >>> idx.set_levels(['a','b'], level=0) - MultiIndex(levels=[['a', 'b'], ['one', 'two']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', 'bar']) - >>> idx.set_levels(['a','b'], level='bar') - MultiIndex(levels=[[1, 2], ['a', 'b']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', 'bar']) - >>> idx.set_levels([['a','b'], [1,2]], level=[0,1]) - MultiIndex(levels=[['a', 'b'], [1, 2]], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', 'bar']) -======= >>> idx.set_levels([['a', 'b'], [1, 2]]) MultiIndex([('a', 1), ('a', 2), ('b', 1), ('b', 2)], - dtype='object', names=['foo', 'bar']) + names=['foo', 'bar']) >>> idx.set_levels(['a', 'b'], level=0) MultiIndex([('a', 'one'), ('a', 'two'), ('b', 'one'), ('b', 'two')], - dtype='object', names=['foo', 'bar']) + names=['foo', 'bar']) >>> idx.set_levels(['a', 'b'], level='bar') MultiIndex([(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')], - dtype='object', names=['foo', 'bar']) + names=['foo', 'bar']) >>> idx.set_levels([['a', 'b'], [1, 2]], level=[0, 1]) MultiIndex([('a', 1), ('a', 2), ('b', 1), ('b', 2)], - dtype='object', names=['foo', 'bar']) ->>>>>>> Update doc string examples and docs + names=['foo', 'bar']) """ if is_list_like(levels) and not isinstance(levels, Index): levels = list(levels) @@ -820,25 +810,25 @@ def set_codes(self, codes, level=None, inplace=False, (1, 'one'), (2, 'two'), (1, 'two')], - dtype='object', names=['foo', 'bar']) + names=['foo', 'bar']) >>> idx.set_codes([1, 0, 1, 0], level=0) MultiIndex([(2, 'one'), (1, 'two'), (2, 'one'), (1, 'two')], - dtype='object', names=['foo', 'bar']) + names=['foo', 'bar']) >>> idx.set_codes([0, 0, 1, 1], level='bar') MultiIndex([(1, 'one'), (1, 'one'), (2, 'two'), (2, 'two')], - dtype='object', names=['foo', 'bar']) + names=['foo', 'bar']) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) MultiIndex([(2, 'one'), (1, 'one'), (2, 'two'), (1, 'two')], - dtype='object', names=['foo', 'bar']) + names=['foo', 'bar']) """ if level is not None and not is_list_like(level): if not is_list_like(codes): @@ -1002,6 +992,12 @@ def _format_data(self, name=None): return format_object_summary(self, self._formatter_func, name=name, line_break_each_value=True) + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value). + """ + return format_object_attrs(self, include_dtype=False) + def _format_native_types(self, na_rep='nan', **kwargs): new_levels = [] new_codes = [] @@ -1599,7 +1595,7 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): (2, 'two'), (2, 'two'), (2, 'two')], - dtype='object') + ) """ levels = self.levels codes = [np.repeat(level_codes, n_repeat) for @@ -1697,14 +1693,14 @@ def _sort_levels_monotonic(self): ('a', 'aa'), ('b', 'bb'), ('b', 'aa')], - dtype='object') + ) >>> mi.sort_values() MultiIndex([('a', 'aa'), ('a', 'bb'), ('b', 'aa'), ('b', 'bb')], - dtype='object') + ) """ if self.is_lexsorted() and self.is_monotonic: @@ -1759,12 +1755,12 @@ def remove_unused_levels(self): (0, 'b'), (1, 'a'), (1, 'b')], - dtype='object') + ) >>> mi[2:] MultiIndex([(1, 'a'), (1, 'b')], - dtype='object') + ) The 0 from the first level is not represented and can be removed @@ -2082,12 +2078,13 @@ def swaplevel(self, i=-2, j=-1): ('a', 'aa'), ('b', 'bb'), ('b', 'aa')], - dtype='object') + ) >>> mi.swaplevel(0, 1) MultiIndex([('bb', 'a'), ('aa', 'a'), ('bb', 'b'), ('aa', 'b')], + ) """ new_levels = list(self.levels) new_codes = list(self.codes) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 98ba2675509ca..f8385385f4848 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -468,7 +468,7 @@ def _justify(head, tail): return head, tail -def format_object_attrs(obj): +def format_object_attrs(obj, include_dtype=True): """ Return a list of tuples of the (attr, formatted_value) for common attrs, including dtype, name, length @@ -477,6 +477,8 @@ def format_object_attrs(obj): ---------- obj : object must be iterable + include_dtype : bool + If False, dtype won't be in the returned list Returns ------- @@ -484,7 +486,7 @@ def format_object_attrs(obj): """ attrs = [] - if hasattr(obj, 'dtype'): + if hasattr(obj, 'dtype') and include_dtype: attrs.append(('dtype', "'{}'".format(obj.dtype))) if getattr(obj, 'name', None) is not None: attrs.append(('name', default_pprint(obj.name))) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 998bc5797bfdc..0915806eb8bf1 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -82,7 +82,7 @@ def test_repr(self, idx): result = idx[:1].__repr__() expected = """\ MultiIndex([('foo', 'one')], - dtype='object', names=['first', 'second'])""" + names=['first', 'second'])""" assert result == expected result = idx.__repr__() @@ -93,7 +93,7 @@ def test_repr(self, idx): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - dtype='object', names=['first', 'second'])""" + names=['first', 'second'])""" assert result == expected with pd.option_context('display.max_seq_items', 5): @@ -104,7 +104,7 @@ def test_repr(self, idx): ... ('qux', 'one'), ('qux', 'two')], - dtype='object', names=['first', 'second'], length=6)""" + names=['first', 'second'], length=6)""" assert result == expected def test_rjust(self, narrow_multi_index): @@ -112,7 +112,7 @@ def test_rjust(self, narrow_multi_index): result = mi[:1].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00')], - dtype='object', names=['a', 'b', 'dti'])""" + names=['a', 'b', 'dti'])""" assert result == expected result = mi[::500].__repr__() @@ -121,7 +121,7 @@ def test_rjust(self, narrow_multi_index): ( 'a', 9, '2000-01-01 00:08:20'), ('abc', 10, '2000-01-01 00:16:40'), ('abc', 10, '2000-01-01 00:25:00')], - dtype='object', names=['a', 'b', 'dti'])""" + names=['a', 'b', 'dti'])""" assert result == expected result = mi.__repr__() @@ -147,14 +147,14 @@ def test_rjust(self, narrow_multi_index): ('abc', 10, '2000-01-01 00:33:17'), ('abc', 10, '2000-01-01 00:33:18'), ('abc', 10, '2000-01-01 00:33:19')], - dtype='object', names=['a', 'b', 'dti'], length=2000)""" + names=['a', 'b', 'dti'], length=2000)""" assert result == expected def test_tuple_width(self, wide_multi_index): mi = wide_multi_index result = mi[:1].__repr__() expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], - dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" assert result == expected result = mi[:10].__repr__() @@ -169,7 +169,7 @@ def test_tuple_width(self, wide_multi_index): ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], - dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" assert result == expected result = mi.__repr__() @@ -195,5 +195,5 @@ def test_tuple_width(self, wide_multi_index): ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], - dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa assert result == expected diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index ec9cbd104d751..28ee9030f5587 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -2,16 +2,30 @@ import pytest from pandas import Categorical, Index, MultiIndex, NaT -from pandas.util.testing import assert_index_equal +from pandas.util.testing import PY3, assert_index_equal def test_index_equal_levels_mismatch(): - msg = """Index are different + if PY3: + msg = """Index are different + +Index levels are different +\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 2, MultiIndex\\(\\[\\('A', 1\\), + \\('A', 2\\), + \\('B', 3\\), + \\('B', 4\\)\\], + \\)""" + else: + msg = """Index are different Index levels are different \\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: 2, MultiIndex\\(levels=\\[\\['A', 'B'\\], \\[1, 2, 3, 4\\]\\], - codes=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)""" +\\[right\\]: 2, MultiIndex\\(\\[\\(u'A', 1\\), + \\(u'A', 2\\), + \\(u'B', 3\\), + \\(u'B', 4\\)\\], + \\)""" idx1 = Index([1, 2, 3]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), From 3ed412ad0d37b761b19d3cb7b414d91d4a48a15e Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 19 Mar 2019 19:44:02 +0100 Subject: [PATCH 10/12] adjust for comments --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/io/formats/printing.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 09224f8b199a4..f0177b45bfa2c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -75,7 +75,7 @@ a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.depreca See :ref:`_groupby.aggregate.named` for more. -.. _whatsnew_0240.enhancements.multi_index_repr: +.. _whatsnew_0250.enhancements.multi_index_repr: Better repr for MultiIndex ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index f8385385f4848..00da7e94ef840 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -285,9 +285,9 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, line_break_each_value : bool, default False If True, inserts a line break for each value of ``obj``. If False, only break lines when the a line of values gets wider - than the display width + than the display width. - .. versionadded:: 0.24.0 + .. versionadded:: 0.25.0 Returns ------- @@ -433,8 +433,7 @@ def best_len(values): def _justify(head, tail): """ - Justify each item in each list-like in head and tail, so each item - right-aligns when the two list-likes are stacked vertically. + Justify items in head and tail, so they are right-aligned when stacked. Parameters ---------- @@ -443,8 +442,9 @@ def _justify(head, tail): Returns ------- - head : list of tuples of strings - tail : list of tuples of strings + tuple of list of tuples of strings + Same as head and tail, but items are right aligned when stacked + vertically. Examples -------- From ad4b0836d7bac05275e9435021293d0cc6459a6d Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 20 Mar 2019 04:13:46 +0100 Subject: [PATCH 11/12] Py2 tests not needed any more --- pandas/core/indexes/multi.py | 6 ++++-- pandas/io/formats/printing.py | 4 ++-- pandas/tests/util/test_assert_index_equal.py | 15 ++------------- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 42fc25e5223e2..0f457ba799928 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -802,8 +802,10 @@ def set_codes(self, codes, level=None, inplace=False, Examples -------- - >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), - (2, 'one'), (2, 'two')], + >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two')], names=['foo', 'bar']) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) MultiIndex([(2, 'one'), diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 00da7e94ef840..73d8586a0a8c9 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -451,11 +451,11 @@ def _justify(head, tail): >>> _justify([['a', 'b']], [['abc', 'abcd']]) ([(' a', ' b')], [('abc', 'abcd')]) """ - combined = head + tail # type: Sequence[Sequence[str]] + combined = head + tail # For each position for the sequences in ``combined``, # find the length of the largest string. - max_length = [0] * len(combined[0]) # type: List[int] + max_length = [0] * len(combined[0]) for inner_seq in combined: length = [len(item) for item in inner_seq] max_length = [max(x, y) for x, y in zip(max_length, length)] diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 28ee9030f5587..445d9c4e482b0 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -2,12 +2,11 @@ import pytest from pandas import Categorical, Index, MultiIndex, NaT -from pandas.util.testing import PY3, assert_index_equal +from pandas.util.testing import assert_index_equal def test_index_equal_levels_mismatch(): - if PY3: - msg = """Index are different + msg = """Index are different Index levels are different \\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) @@ -16,16 +15,6 @@ def test_index_equal_levels_mismatch(): \\('B', 3\\), \\('B', 4\\)\\], \\)""" - else: - msg = """Index are different - -Index levels are different -\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: 2, MultiIndex\\(\\[\\(u'A', 1\\), - \\(u'A', 2\\), - \\(u'B', 3\\), - \\(u'B', 4\\)\\], - \\)""" idx1 = Index([1, 2, 3]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), From 1d96c98c94a5f98dd44228b2770e9c64ddaabd64 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 23 Apr 2019 19:15:17 +0200 Subject: [PATCH 12/12] remove inheritance from 'object' --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/tests/indexes/multi/test_format.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index f0177b45bfa2c..3ccffdedcb895 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -92,7 +92,7 @@ The repr now looks like this: Previously, outputting a :class:`MultiIndex` printed all the ``levels`` and ``codes`` of the ``MultiIndex``, which was visually unappealing and made -the output more difficult to navigate: +the output more difficult to navigate. For example (limiting the range to 5): .. code-block:: ipython diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 0915806eb8bf1..8315478d85125 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -76,7 +76,7 @@ def test_repr_max_seq_item_setting(idx): assert '...' not in str(idx) -class TestRepr(object): +class TestRepr: def test_repr(self, idx): result = idx[:1].__repr__()