Skip to content

Commit ce46223

Browse files
committed
ENH: better MultiIndex.__repr__
1 parent ea06f8d commit ce46223

File tree

3 files changed

+235
-27
lines changed

3 files changed

+235
-27
lines changed

pandas/core/indexes/multi.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
from pandas.core.indexes.frozen import FrozenList, _ensure_frozen
3030
import pandas.core.missing as missing
3131

32-
from pandas.io.formats.printing import pprint_thing
32+
from pandas.io.formats.printing import (
33+
default_pprint, format_object_summary, pprint_thing)
3334

3435
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3536
_index_doc_kwargs.update(
@@ -947,28 +948,57 @@ def _nbytes(self, deep=False):
947948

948949
# --------------------------------------------------------------------
949950
# Rendering Methods
951+
def _formatter_func(self, tup):
952+
"""
953+
Formats each item in tup according to its level's formatter function.
954+
"""
955+
formatter_funcs = [level._formatter_func for level in self.levels]
956+
return tuple(func(val) for func, val in zip(formatter_funcs, tup))
950957

951958
def _format_attrs(self):
952959
"""
953960
Return a list of tuples of the (attr,formatted_value)
954961
"""
955-
attrs = [
956-
('levels', ibase.default_pprint(self._levels,
957-
max_seq_items=False)),
958-
('codes', ibase.default_pprint(self._codes,
959-
max_seq_items=False))]
960-
if com._any_not_none(*self.names):
961-
attrs.append(('names', ibase.default_pprint(self.names)))
962-
if self.sortorder is not None:
963-
attrs.append(('sortorder', ibase.default_pprint(self.sortorder)))
962+
attrs = []
963+
attrs.append(('dtype', "'{}'".format(self.dtype)))
964+
if self.names is not None and any(self.names):
965+
attrs.append(('names', default_pprint(self.names)))
966+
max_seq_items = get_option('display.max_seq_items') or len(self)
967+
if len(self) > max_seq_items:
968+
attrs.append(('length', len(self)))
964969
return attrs
965970

966971
def _format_space(self):
967-
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
972+
return " "
968973

969974
def _format_data(self, name=None):
970-
# we are formatting thru the attributes
971-
return None
975+
"""
976+
Return the formatted data as a unicode string
977+
"""
978+
return format_object_summary(self, self._formatter_func,
979+
name=name, is_multi=True)
980+
981+
def __unicode__(self):
982+
"""
983+
Return a string representation for this MultiIndex.
984+
985+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
986+
py2/py3.
987+
"""
988+
klass = self.__class__.__name__
989+
data = self._format_data()
990+
attrs = self._format_attrs()
991+
space = self._format_space()
992+
993+
prepr = (",%s" % space).join("%s=%s" % (k, v) for k, v in attrs)
994+
995+
# no data provided, just attributes
996+
if data is None:
997+
data = ''
998+
999+
res = "%s(%s%s)" % (klass, data, prepr)
1000+
1001+
return res
9721002

9731003
def _format_native_types(self, na_rep='nan', **kwargs):
9741004
new_levels = []

pandas/io/formats/printing.py

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ class TableSchemaFormatter(BaseFormatter):
265265

266266

267267
def format_object_summary(obj, formatter, is_justify=True, name=None,
268-
indent_for_name=True):
268+
indent_for_name=True, is_multi=False):
269269
"""
270270
Return the formatted obj as a unicode string
271271
@@ -282,6 +282,8 @@ def format_object_summary(obj, formatter, is_justify=True, name=None,
282282
indent_for_name : bool, default True
283283
Whether subsequent lines should be be indented to
284284
align with the name.
285+
is_multi : bool, default False
286+
Is ``obj`` a :class:`MultiIndex` or not
285287
286288
Returns
287289
-------
@@ -306,7 +308,7 @@ def format_object_summary(obj, formatter, is_justify=True, name=None,
306308
space2 = "\n " # space for the opening '['
307309

308310
n = len(obj)
309-
sep = ','
311+
sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
310312
max_seq_items = get_option('display.max_seq_items') or n
311313

312314
# are we a truncated display
@@ -334,10 +336,10 @@ def best_len(values):
334336

335337
if n == 0:
336338
summary = '[]{}'.format(close)
337-
elif n == 1:
339+
elif n == 1 and not is_multi:
338340
first = formatter(obj[0])
339341
summary = '[{}]{}'.format(first, close)
340-
elif n == 2:
342+
elif n == 2 and not is_multi:
341343
first = formatter(obj[0])
342344
last = formatter(obj[-1])
343345
summary = '[{}, {}]{}'.format(first, last, close)
@@ -353,15 +355,16 @@ def best_len(values):
353355

354356
# adjust all values to max length if needed
355357
if is_justify:
356-
357-
# however, if we are not truncated and we are only a single
358-
# line, then don't justify
359-
if (is_truncated or
360-
not (len(', '.join(head)) < display_width and
361-
len(', '.join(tail)) < display_width)):
362-
max_len = max(best_len(head), best_len(tail))
363-
head = [x.rjust(max_len) for x in head]
364-
tail = [x.rjust(max_len) for x in tail]
358+
head, tail = _justify(head, tail, display_width, best_len,
359+
is_truncated, is_multi)
360+
if is_multi:
361+
max_space = display_width - len(space2)
362+
item = tail[0]
363+
for i in reversed(range(1, len(item) + 1)):
364+
if len(_pprint_seq(item, max_seq_items=i)) < max_space:
365+
break
366+
head = [_pprint_seq(x, max_seq_items=i) for x in head]
367+
tail = [_pprint_seq(x, max_seq_items=i) for x in tail]
365368

366369
summary = ""
367370
line = space2
@@ -391,7 +394,7 @@ def best_len(values):
391394
close = ']' + close.rstrip(' ')
392395
summary += close
393396

394-
if len(summary) > (display_width):
397+
if len(summary) > (display_width) or is_multi:
395398
summary += space1
396399
else: # one row
397400
summary += ' '
@@ -402,6 +405,52 @@ def best_len(values):
402405
return summary
403406

404407

408+
def _justify(head, tail, display_width, best_len,
409+
is_truncated=False, is_multi=False):
410+
"""
411+
Justify each item in head and tail, so they align properly.
412+
"""
413+
if is_multi:
414+
max_length = _max_level_item_length(head + tail)
415+
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
416+
for seq in head]
417+
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
418+
for seq in tail]
419+
elif (is_truncated or not (len(', '.join(head)) < display_width and
420+
len(', '.join(tail)) < display_width)):
421+
max_length = max(best_len(head), best_len(tail))
422+
head = [x.rjust(max_length) for x in head]
423+
tail = [x.rjust(max_length) for x in tail]
424+
425+
return head, tail
426+
427+
428+
def _max_level_item_length(seq):
429+
"""
430+
For each position for the sequences in ``seq``, find the largest length.
431+
432+
Used for justifying individual values in a :class:`pandas.MultiIndex`.
433+
434+
Parameters
435+
----------
436+
seq : list-like of list-likes of strings
437+
438+
Returns
439+
-------
440+
max_length : list of ints
441+
442+
Examples
443+
--------
444+
>>> _max_level_item_length([['s', 'ab'], ['abc', 'a']])
445+
[3, 2]
446+
"""
447+
max_length = [0] * len(seq[0])
448+
for inner_seq in seq:
449+
length = [len(item) for item in inner_seq]
450+
max_length = [max(x, y) for x, y in zip(max_length, length)]
451+
return max_length
452+
453+
405454
def format_object_attrs(obj):
406455
"""
407456
Return a list of tuples of the (attr, formatted_value)

pandas/tests/indexes/multi/test_format.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,132 @@ def test_repr_max_seq_item_setting(idx):
9494
with pd.option_context("display.max_seq_items", None):
9595
repr(idx)
9696
assert '...' not in str(idx)
97+
98+
99+
class TestRepr(object):
100+
101+
def setup_class(self):
102+
n = 1000
103+
ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
104+
dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
105+
self.narrow_mi = pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
106+
names=['a', 'b', 'dti'])
107+
108+
levels = [ci, ci.codes + 9, dti, dti, dti]
109+
names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
110+
self.wide_mi = pd.MultiIndex.from_arrays(levels, names=names)
111+
112+
def test_repr(self, idx):
113+
result = idx[:1].__repr__()
114+
expected = """MultiIndex([('foo', 'one')],
115+
dtype='object', names=['first', 'second'])"""
116+
assert result == expected
117+
118+
result = idx.__repr__()
119+
expected = """MultiIndex([('foo', 'one'),
120+
('foo', 'two'),
121+
('bar', 'one'),
122+
('baz', 'two'),
123+
('qux', 'one'),
124+
('qux', 'two')],
125+
dtype='object', names=['first', 'second'])"""
126+
assert result == expected
127+
128+
with pd.option_context('display.max_seq_items', 5):
129+
result = idx.__repr__()
130+
expected = """MultiIndex([('foo', 'one'),
131+
('foo', 'two'),
132+
...
133+
('qux', 'one'),
134+
('qux', 'two')],
135+
dtype='object', names=['first', 'second'], length=6)"""
136+
assert result == expected
137+
138+
def test_rjust(self):
139+
result = self.narrow_mi[:1].__repr__()
140+
expected = """\
141+
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
142+
dtype='object', names=['a', 'b', 'dti'])"""
143+
assert result == expected
144+
145+
result = self.narrow_mi[::500].__repr__()
146+
expected = """\
147+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
148+
( 'a', 9, '2000-01-01 00:08:20'),
149+
('abc', 10, '2000-01-01 00:16:40'),
150+
('abc', 10, '2000-01-01 00:25:00')],
151+
dtype='object', names=['a', 'b', 'dti'])"""
152+
assert result == expected
153+
154+
result = self.narrow_mi.__repr__()
155+
expected = """\
156+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
157+
( 'a', 9, '2000-01-01 00:00:01'),
158+
( 'a', 9, '2000-01-01 00:00:02'),
159+
( 'a', 9, '2000-01-01 00:00:03'),
160+
( 'a', 9, '2000-01-01 00:00:04'),
161+
( 'a', 9, '2000-01-01 00:00:05'),
162+
( 'a', 9, '2000-01-01 00:00:06'),
163+
( 'a', 9, '2000-01-01 00:00:07'),
164+
( 'a', 9, '2000-01-01 00:00:08'),
165+
( 'a', 9, '2000-01-01 00:00:09'),
166+
...
167+
('abc', 10, '2000-01-01 00:33:10'),
168+
('abc', 10, '2000-01-01 00:33:11'),
169+
('abc', 10, '2000-01-01 00:33:12'),
170+
('abc', 10, '2000-01-01 00:33:13'),
171+
('abc', 10, '2000-01-01 00:33:14'),
172+
('abc', 10, '2000-01-01 00:33:15'),
173+
('abc', 10, '2000-01-01 00:33:16'),
174+
('abc', 10, '2000-01-01 00:33:17'),
175+
('abc', 10, '2000-01-01 00:33:18'),
176+
('abc', 10, '2000-01-01 00:33:19')],
177+
dtype='object', names=['a', 'b', 'dti'], length=2000)"""
178+
assert result == expected
179+
180+
def test_tuple_width(self):
181+
result = self.wide_mi[:1].__repr__()
182+
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
183+
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
184+
assert result == expected
185+
186+
result = self.wide_mi[:10].__repr__()
187+
expected = """\
188+
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
189+
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
190+
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
191+
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
192+
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
193+
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
194+
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
195+
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
196+
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
197+
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
198+
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
199+
assert result == expected
200+
201+
result = self.wide_mi.__repr__()
202+
expected = """\
203+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
204+
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
205+
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
206+
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
207+
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
208+
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
209+
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
210+
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
211+
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
212+
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
213+
...
214+
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
215+
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
216+
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
217+
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
218+
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
219+
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
220+
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
221+
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
222+
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
223+
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
224+
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa
225+
assert result == expected

0 commit comments

Comments
 (0)