Skip to content

Commit b7cc302

Browse files
author
y-p
committed
ENH: py2/py3 support for str(x)/unicode(x) and bytes(x)/str(x) for series,df,panel
- If you put in proper unicode data, you're good. - If you put in utf-8 bytestrings you should still be good (it works if rendering is wrapped by pprint_thing, I may have missed a few spots). - If you put in non utf-8 bytestrings, with the encoding unknown, and expect unicode(x) or str(x) to do the right thing - you're doing it wrong.
1 parent c22da50 commit b7cc302

File tree

3 files changed

+139
-30
lines changed

3 files changed

+139
-30
lines changed

pandas/core/frame.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -612,20 +612,51 @@ def _need_info_repr_(self):
612612
else:
613613
return False
614614

615-
def __repr__(self):
615+
def __str__(self):
616+
"""
617+
Return a string representation for a particular DataFrame
618+
619+
Invoked by str(df) in both py2/py3.
620+
Yields Bytestring in Py2, Unicode String in py3.
621+
"""
622+
623+
if py3compat.PY3:
624+
return self.__unicode__()
625+
return self.__bytes__()
626+
627+
def __bytes__(self):
628+
"""
629+
Return a string representation for a particular DataFrame
630+
631+
Invoked by bytes(df) in py3 only.
632+
Yields a bytestring in both py2/py3.
633+
"""
634+
return com.console_encode(self.__unicode__())
635+
636+
def __unicode__(self):
616637
"""
617638
Return a string representation for a particular DataFrame
639+
640+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
618641
"""
619-
buf = StringIO()
642+
buf = StringIO(u"")
620643
if self._need_info_repr_():
621644
self.info(buf=buf, verbose=self._verbose_info)
622645
else:
623646
self.to_string(buf=buf)
647+
624648
value = buf.getvalue()
649+
assert type(value) == unicode
625650

626-
if py3compat.PY3:
627-
return unicode(value)
628-
return com.console_encode(value)
651+
return value
652+
653+
def __repr__(self):
654+
"""
655+
Return a string representation for a particular DataFrame
656+
657+
Yields Bytestring in Py2, Unicode String in py3.
658+
"""
659+
return str(self)
629660

630661
def _repr_html_(self):
631662
"""

pandas/core/panel.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -386,34 +386,70 @@ def __array_wrap__(self, result):
386386
#----------------------------------------------------------------------
387387
# Magic methods
388388

389-
def __repr__(self):
389+
def __str__(self):
390+
"""
391+
Return a string representation for a particular Panel
392+
393+
Invoked by str(df) in both py2/py3.
394+
Yields Bytestring in Py2, Unicode String in py3.
395+
"""
396+
397+
if py3compat.PY3:
398+
return self.__unicode__()
399+
return self.__bytes__()
400+
401+
def __bytes__(self):
402+
"""
403+
Return a string representation for a particular Panel
404+
405+
Invoked by bytes(df) in py3 only.
406+
Yields a bytestring in both py2/py3.
407+
"""
408+
return com.console_encode(self.__unicode__())
409+
410+
def __unicode__(self):
411+
"""
412+
Return a string representation for a particular Panel
413+
414+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
415+
"""
416+
390417
class_name = str(self.__class__)
391418

392419
I, N, K = len(self.items), len(self.major_axis), len(self.minor_axis)
393420

394-
dims = 'Dimensions: %d (items) x %d (major) x %d (minor)' % (I, N, K)
421+
dims = u'Dimensions: %d (items) x %d (major) x %d (minor)' % (I, N, K)
395422

396423
if len(self.major_axis) > 0:
397-
major = 'Major axis: %s to %s' % (self.major_axis[0],
424+
major = u'Major axis: %s to %s' % (self.major_axis[0],
398425
self.major_axis[-1])
399426
else:
400-
major = 'Major axis: None'
427+
major = u'Major axis: None'
401428

402429
if len(self.minor_axis) > 0:
403-
minor = 'Minor axis: %s to %s' % (self.minor_axis[0],
404-
self.minor_axis[-1])
430+
minor = u'Minor axis: %s to %s' % (com.pprint_thing(self.minor_axis[0]),
431+
com.pprint_thing(self.minor_axis[-1]))
405432
else:
406-
minor = 'Minor axis: None'
433+
minor = u'Minor axis: None'
407434

408435
if len(self.items) > 0:
409-
items = 'Items: %s to %s' % (self.items[0], self.items[-1])
436+
items = u'Items: %s to %s' % (com.pprint_thing(self.items[0]),
437+
com.pprint_thing(self.items[-1]))
410438
else:
411-
items = 'Items: None'
439+
items = u'Items: None'
412440

413-
output = '%s\n%s\n%s\n%s\n%s' % (class_name, dims, items, major, minor)
441+
output = u'%s\n%s\n%s\n%s\n%s' % (class_name, dims, items, major, minor)
414442

415443
return output
416444

445+
def __repr__(self):
446+
"""
447+
Return a string representation for a particular Panel
448+
449+
Yields Bytestring in Py2, Unicode String in py3.
450+
"""
451+
return str(self)
452+
417453
def __iter__(self):
418454
return iter(self.items)
419455

pandas/core/series.py

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -858,8 +858,34 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
858858

859859
return df.reset_index(level=level, drop=drop)
860860

861-
def __repr__(self):
862-
"""Clean string representation of a Series"""
861+
862+
def __str__(self):
863+
"""
864+
Return a string representation for a particular DataFrame
865+
866+
Invoked by str(df) in both py2/py3.
867+
Yields Bytestring in Py2, Unicode String in py3.
868+
"""
869+
870+
if py3compat.PY3:
871+
return self.__unicode__()
872+
return self.__bytes__()
873+
874+
def __bytes__(self):
875+
"""
876+
Return a string representation for a particular DataFrame
877+
878+
Invoked by bytes(df) in py3 only.
879+
Yields a bytestring in both py2/py3.
880+
"""
881+
return com.console_encode(self.__unicode__())
882+
883+
def __unicode__(self):
884+
"""
885+
Return a string representation for a particular DataFrame
886+
887+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
888+
"""
863889
width, height = get_terminal_size()
864890
max_rows = (height if fmt.print_config.max_rows == 0
865891
else fmt.print_config.max_rows)
@@ -870,13 +896,24 @@ def __repr__(self):
870896
length=len(self) > 50,
871897
name=True)
872898
else:
873-
result = '%s' % ndarray.__repr__(self)
899+
result = com.pprint_thing(self)
874900

875-
if py3compat.PY3:
876-
return unicode(result)
877-
return com.console_encode(result)
901+
assert type(result) == unicode
902+
return result
903+
904+
def __repr__(self):
905+
"""
906+
Return a string representation for a particular Series
907+
908+
Yields Bytestring in Py2, Unicode String in py3.
909+
"""
910+
return str(self)
878911

879912
def _tidy_repr(self, max_vals=20):
913+
"""
914+
915+
Internal function, should always return unicode string
916+
"""
880917
num = max_vals // 2
881918
head = self[:num]._get_repr(print_header=True, length=False,
882919
name=False)
@@ -886,13 +923,11 @@ def _tidy_repr(self, max_vals=20):
886923
result = head + '\n...\n' + tail
887924
result = '%s\n%s' % (result, self._repr_footer())
888925

889-
if py3compat.PY3:
890-
return unicode(result)
891-
return com.console_encode(result)
926+
return unicode(result)
892927

893928
def _repr_footer(self):
894-
namestr = "Name: %s, " % com.pprint_thing(self.name) if self.name is not None else ""
895-
return '%sLength: %d' % (namestr, len(self))
929+
namestr = u"Name: %s, " % com.pprint_thing(self.name) if self.name is not None else ""
930+
return u'%sLength: %d' % (namestr, len(self))
896931

897932
def to_string(self, buf=None, na_rep='NaN', float_format=None,
898933
nanRep=None, length=False, name=False):
@@ -925,20 +960,27 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None,
925960

926961
the_repr = self._get_repr(float_format=float_format, na_rep=na_rep,
927962
length=length, name=name)
963+
964+
assert type(the_repr) == unicode
965+
928966
if buf is None:
929967
return the_repr
930968
else:
931969
print >> buf, the_repr
932970

933971
def _get_repr(self, name=False, print_header=False, length=True,
934972
na_rep='NaN', float_format=None):
973+
"""
974+
975+
Internal function, should always return unicode string
976+
"""
977+
935978
formatter = fmt.SeriesFormatter(self, name=name, header=print_header,
936979
length=length, na_rep=na_rep,
937980
float_format=float_format)
938-
return formatter.to_string()
939-
940-
def __str__(self):
941-
return repr(self)
981+
result = formatter.to_string()
982+
assert type(result) == unicode
983+
return result
942984

943985
def __iter__(self):
944986
if np.issubdtype(self.dtype, np.datetime64):

0 commit comments

Comments
 (0)