Skip to content

Commit c445cd2

Browse files
committed
ENH: better MultiIndex.__repr__
1 parent d41c1da commit c445cd2

File tree

3 files changed

+235
-27
lines changed

3 files changed

+235
-27
lines changed

pandas/core/indexes/multi.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
from pandas.core.indexes.frozen import FrozenList, _ensure_frozen
3131
import pandas.core.missing as missing
3232

33-
from pandas.io.formats.printing import pprint_thing
33+
from pandas.io.formats.printing import (
34+
default_pprint, format_object_summary, pprint_thing)
3435

3536
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3637
_index_doc_kwargs.update(
@@ -906,28 +907,57 @@ def _nbytes(self, deep=False):
906907

907908
# --------------------------------------------------------------------
908909
# Rendering Methods
910+
def _formatter_func(self, tup):
911+
"""
912+
Formats each item in tup according to its level's formatter function.
913+
"""
914+
formatter_funcs = [level._formatter_func for level in self.levels]
915+
return tuple(func(val) for func, val in zip(formatter_funcs, tup))
909916

910917
def _format_attrs(self):
911918
"""
912919
Return a list of tuples of the (attr,formatted_value)
913920
"""
914-
attrs = [
915-
('levels', ibase.default_pprint(self._levels,
916-
max_seq_items=False)),
917-
('codes', ibase.default_pprint(self._codes,
918-
max_seq_items=False))]
919-
if com._any_not_none(*self.names):
920-
attrs.append(('names', ibase.default_pprint(self.names)))
921-
if self.sortorder is not None:
922-
attrs.append(('sortorder', ibase.default_pprint(self.sortorder)))
921+
attrs = []
922+
attrs.append(('dtype', "'{}'".format(self.dtype)))
923+
if self.names is not None and any(self.names):
924+
attrs.append(('names', default_pprint(self.names)))
925+
max_seq_items = get_option('display.max_seq_items') or len(self)
926+
if len(self) > max_seq_items:
927+
attrs.append(('length', len(self)))
923928
return attrs
924929

925930
def _format_space(self):
926-
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
931+
return " "
927932

928933
def _format_data(self, name=None):
929-
# we are formatting thru the attributes
930-
return None
934+
"""
935+
Return the formatted data as a unicode string
936+
"""
937+
return format_object_summary(self, self._formatter_func,
938+
name=name, is_multi=True)
939+
940+
def __unicode__(self):
941+
"""
942+
Return a string representation for this MultiIndex.
943+
944+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
945+
py2/py3.
946+
"""
947+
klass = self.__class__.__name__
948+
data = self._format_data()
949+
attrs = self._format_attrs()
950+
space = self._format_space()
951+
952+
prepr = (",%s" % space).join("%s=%s" % (k, v) for k, v in attrs)
953+
954+
# no data provided, just attributes
955+
if data is None:
956+
data = ''
957+
958+
res = "%s(%s%s)" % (klass, data, prepr)
959+
960+
return res
931961

932962
def _format_native_types(self, na_rep='nan', **kwargs):
933963
new_levels = []

pandas/io/formats/printing.py

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ class TableSchemaFormatter(BaseFormatter):
265265

266266

267267
def format_object_summary(obj, formatter, is_justify=True, name=None,
268-
indent_for_name=True):
268+
indent_for_name=True, is_multi=False):
269269
"""
270270
Return the formatted obj as a unicode string
271271
@@ -282,6 +282,8 @@ def format_object_summary(obj, formatter, is_justify=True, name=None,
282282
indent_for_name : bool, default True
283283
Whether subsequent lines should be be indented to
284284
align with the name.
285+
is_multi : bool, default False
286+
Is ``obj`` a :class:`MultiIndex` or not
285287
286288
Returns
287289
-------
@@ -306,7 +308,7 @@ def format_object_summary(obj, formatter, is_justify=True, name=None,
306308
space2 = "\n " # space for the opening '['
307309

308310
n = len(obj)
309-
sep = ','
311+
sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
310312
max_seq_items = get_option('display.max_seq_items') or n
311313

312314
# are we a truncated display
@@ -334,10 +336,10 @@ def best_len(values):
334336

335337
if n == 0:
336338
summary = '[]{}'.format(close)
337-
elif n == 1:
339+
elif n == 1 and not is_multi:
338340
first = formatter(obj[0])
339341
summary = '[{}]{}'.format(first, close)
340-
elif n == 2:
342+
elif n == 2 and not is_multi:
341343
first = formatter(obj[0])
342344
last = formatter(obj[-1])
343345
summary = '[{}, {}]{}'.format(first, last, close)
@@ -353,15 +355,16 @@ def best_len(values):
353355

354356
# adjust all values to max length if needed
355357
if is_justify:
356-
357-
# however, if we are not truncated and we are only a single
358-
# line, then don't justify
359-
if (is_truncated or
360-
not (len(', '.join(head)) < display_width and
361-
len(', '.join(tail)) < display_width)):
362-
max_len = max(best_len(head), best_len(tail))
363-
head = [x.rjust(max_len) for x in head]
364-
tail = [x.rjust(max_len) for x in tail]
358+
head, tail = _justify(head, tail, display_width, best_len,
359+
is_truncated, is_multi)
360+
if is_multi:
361+
max_space = display_width - len(space2)
362+
item = tail[0]
363+
for i in reversed(range(1, len(item) + 1)):
364+
if len(_pprint_seq(item, max_seq_items=i)) < max_space:
365+
break
366+
head = [_pprint_seq(x, max_seq_items=i) for x in head]
367+
tail = [_pprint_seq(x, max_seq_items=i) for x in tail]
365368

366369
summary = ""
367370
line = space2
@@ -391,7 +394,7 @@ def best_len(values):
391394
close = ']' + close.rstrip(' ')
392395
summary += close
393396

394-
if len(summary) > (display_width):
397+
if len(summary) > (display_width) or is_multi:
395398
summary += space1
396399
else: # one row
397400
summary += ' '
@@ -402,6 +405,52 @@ def best_len(values):
402405
return summary
403406

404407

408+
def _justify(head, tail, display_width, best_len,
409+
is_truncated=False, is_multi=False):
410+
"""
411+
Justify each item in head and tail, so they align properly.
412+
"""
413+
if is_multi:
414+
max_length = _max_level_item_length(head + tail)
415+
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
416+
for seq in head]
417+
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
418+
for seq in tail]
419+
elif (is_truncated or not (len(', '.join(head)) < display_width and
420+
len(', '.join(tail)) < display_width)):
421+
max_length = max(best_len(head), best_len(tail))
422+
head = [x.rjust(max_length) for x in head]
423+
tail = [x.rjust(max_length) for x in tail]
424+
425+
return head, tail
426+
427+
428+
def _max_level_item_length(seq):
429+
"""
430+
For each position for the sequences in ``seq``, find the largest length.
431+
432+
Used for justifying individual values in a :class:`pandas.MultiIndex`.
433+
434+
Parameters
435+
----------
436+
seq : list-like of list-likes of strings
437+
438+
Returns
439+
-------
440+
max_length : list of ints
441+
442+
Examples
443+
--------
444+
>>> _max_level_item_length([['s', 'ab'], ['abc', 'a']])
445+
[3, 2]
446+
"""
447+
max_length = [0] * len(seq[0])
448+
for inner_seq in seq:
449+
length = [len(item) for item in inner_seq]
450+
max_length = [max(x, y) for x, y in zip(max_length, length)]
451+
return max_length
452+
453+
405454
def format_object_attrs(obj):
406455
"""
407456
Return a list of tuples of the (attr, formatted_value)

pandas/tests/indexes/multi/test_format.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,132 @@ def test_repr_max_seq_item_setting(idx):
103103
with pd.option_context("display.max_seq_items", None):
104104
repr(idx)
105105
assert '...' not in str(idx)
106+
107+
108+
class TestRepr(object):
109+
110+
def setup_class(self):
111+
n = 1000
112+
ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
113+
dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
114+
self.narrow_mi = pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
115+
names=['a', 'b', 'dti'])
116+
117+
levels = [ci, ci.codes + 9, dti, dti, dti]
118+
names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
119+
self.wide_mi = pd.MultiIndex.from_arrays(levels, names=names)
120+
121+
def test_repr(self, idx):
122+
result = idx[:1].__repr__()
123+
expected = """MultiIndex([('foo', 'one')],
124+
dtype='object', names=['first', 'second'])"""
125+
assert result == expected
126+
127+
result = idx.__repr__()
128+
expected = """MultiIndex([('foo', 'one'),
129+
('foo', 'two'),
130+
('bar', 'one'),
131+
('baz', 'two'),
132+
('qux', 'one'),
133+
('qux', 'two')],
134+
dtype='object', names=['first', 'second'])"""
135+
assert result == expected
136+
137+
with pd.option_context('display.max_seq_items', 5):
138+
result = idx.__repr__()
139+
expected = """MultiIndex([('foo', 'one'),
140+
('foo', 'two'),
141+
...
142+
('qux', 'one'),
143+
('qux', 'two')],
144+
dtype='object', names=['first', 'second'], length=6)"""
145+
assert result == expected
146+
147+
def test_rjust(self):
148+
result = self.narrow_mi[:1].__repr__()
149+
expected = """\
150+
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
151+
dtype='object', names=['a', 'b', 'dti'])"""
152+
assert result == expected
153+
154+
result = self.narrow_mi[::500].__repr__()
155+
expected = """\
156+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
157+
( 'a', 9, '2000-01-01 00:08:20'),
158+
('abc', 10, '2000-01-01 00:16:40'),
159+
('abc', 10, '2000-01-01 00:25:00')],
160+
dtype='object', names=['a', 'b', 'dti'])"""
161+
assert result == expected
162+
163+
result = self.narrow_mi.__repr__()
164+
expected = """\
165+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
166+
( 'a', 9, '2000-01-01 00:00:01'),
167+
( 'a', 9, '2000-01-01 00:00:02'),
168+
( 'a', 9, '2000-01-01 00:00:03'),
169+
( 'a', 9, '2000-01-01 00:00:04'),
170+
( 'a', 9, '2000-01-01 00:00:05'),
171+
( 'a', 9, '2000-01-01 00:00:06'),
172+
( 'a', 9, '2000-01-01 00:00:07'),
173+
( 'a', 9, '2000-01-01 00:00:08'),
174+
( 'a', 9, '2000-01-01 00:00:09'),
175+
...
176+
('abc', 10, '2000-01-01 00:33:10'),
177+
('abc', 10, '2000-01-01 00:33:11'),
178+
('abc', 10, '2000-01-01 00:33:12'),
179+
('abc', 10, '2000-01-01 00:33:13'),
180+
('abc', 10, '2000-01-01 00:33:14'),
181+
('abc', 10, '2000-01-01 00:33:15'),
182+
('abc', 10, '2000-01-01 00:33:16'),
183+
('abc', 10, '2000-01-01 00:33:17'),
184+
('abc', 10, '2000-01-01 00:33:18'),
185+
('abc', 10, '2000-01-01 00:33:19')],
186+
dtype='object', names=['a', 'b', 'dti'], length=2000)"""
187+
assert result == expected
188+
189+
def test_tuple_width(self):
190+
result = self.wide_mi[:1].__repr__()
191+
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
192+
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
193+
assert result == expected
194+
195+
result = self.wide_mi[:10].__repr__()
196+
expected = """\
197+
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
198+
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
199+
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
200+
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
201+
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
202+
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
203+
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
204+
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
205+
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
206+
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
207+
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
208+
assert result == expected
209+
210+
result = self.wide_mi.__repr__()
211+
expected = """\
212+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
213+
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
214+
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
215+
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
216+
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
217+
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
218+
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
219+
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
220+
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
221+
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
222+
...
223+
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
224+
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
225+
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
226+
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
227+
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
228+
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
229+
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
230+
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
231+
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
232+
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
233+
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa
234+
assert result == expected

0 commit comments

Comments
 (0)