Skip to content

Commit 2599741

Browse files
author
y-p
committed
ENH: to_string() and to_str_columns() should return unicode, deprecate force_unicode #2225
using pprint_thing will try to decode using utf-8 as a fallback, but by these functions will now return unicode() rather then str() objects.
1 parent 007622d commit 2599741

File tree

3 files changed

+28
-39
lines changed

3 files changed

+28
-39
lines changed

pandas/core/format.py

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,7 @@ def to_string(self):
135135
if footer:
136136
result.append(footer)
137137

138-
if py3compat.PY3:
139-
return unicode(u'\n'.join(result))
140-
return com.console_encode(u'\n'.join(result))
138+
return unicode(u'\n'.join(result))
141139

142140
if py3compat.PY3: # pragma: no cover
143141
_encode_diff = lambda x: 0
@@ -200,10 +198,15 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
200198
else:
201199
self.columns = frame.columns
202200

203-
def _to_str_columns(self, force_unicode=False):
201+
def _to_str_columns(self, force_unicode=None):
204202
"""
205203
Render a DataFrame to a list of columns (as lists of strings).
206204
"""
205+
import warnings
206+
if force_unicode is not None: # pragma: no cover
207+
warnings.warn("force_unicode is deprecated, it will have no effect",
208+
FutureWarning)
209+
207210
# may include levels names also
208211
str_index = self._get_formatted_index()
209212
str_columns = self._get_formatted_column_labels()
@@ -237,32 +240,17 @@ def _to_str_columns(self, force_unicode=False):
237240
if self.index:
238241
strcols.insert(0, str_index)
239242

240-
if not py3compat.PY3:
241-
if force_unicode:
242-
def make_unicode(x):
243-
if isinstance(x, unicode):
244-
return x
245-
return x.decode('utf-8')
246-
strcols = map(lambda col: map(make_unicode, col), strcols)
247-
else:
248-
# Generally everything is plain strings, which has ascii
249-
# encoding. Problem is when there is a char with value over
250-
# 127. Everything then gets converted to unicode.
251-
try:
252-
map(lambda col: map(str, col), strcols)
253-
except UnicodeError:
254-
def make_unicode(x):
255-
if isinstance(x, unicode):
256-
return x
257-
return x.decode('utf-8')
258-
strcols = map(lambda col: map(make_unicode, col), strcols)
259-
260243
return strcols
261244

262-
def to_string(self, force_unicode=False):
245+
def to_string(self, force_unicode=None):
263246
"""
264247
Render a DataFrame to a console-friendly tabular output.
265248
"""
249+
import warnings
250+
if force_unicode is not None: # pragma: no cover
251+
warnings.warn("force_unicode is deprecated, it will have no effect",
252+
FutureWarning)
253+
266254
frame = self.frame
267255

268256
if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -272,15 +260,20 @@ def to_string(self, force_unicode=False):
272260
com.pprint_thing(frame.index)))
273261
text = info_line
274262
else:
275-
strcols = self._to_str_columns(force_unicode)
263+
strcols = self._to_str_columns()
276264
text = adjoin(1, *strcols)
277265

278266
self.buf.writelines(text)
279267

280-
def to_latex(self, force_unicode=False, column_format=None):
268+
def to_latex(self, force_unicode=None, column_format=None):
281269
"""
282270
Render a DataFrame to a LaTeX tabular environment output.
283271
"""
272+
import warnings
273+
if force_unicode is not None: # pragma: no cover
274+
warnings.warn("force_unicode is deprecated, it will have no effect",
275+
FutureWarning)
276+
284277
frame = self.frame
285278

286279
if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -289,7 +282,7 @@ def to_latex(self, force_unicode=False, column_format=None):
289282
frame.columns, frame.index))
290283
strcols = [[info_line]]
291284
else:
292-
strcols = self._to_str_columns(force_unicode)
285+
strcols = self._to_str_columns()
293286

294287
if column_format is None:
295288
column_format = '|l|%s|' % '|'.join('c' for _ in strcols)

pandas/core/frame.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,19 +1379,21 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
13791379
def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
13801380
header=True, index=True, na_rep='NaN', formatters=None,
13811381
float_format=None, sparsify=None, nanRep=None,
1382-
index_names=True, justify=None, force_unicode=False):
1382+
index_names=True, justify=None, force_unicode=None):
13831383
"""
13841384
Render a DataFrame to a console-friendly tabular output.
13851385
"""
1386+
import warnings
1387+
if force_unicode is not None: # pragma: no cover
1388+
warnings.warn("force_unicode is deprecated, it will have no effect",
1389+
FutureWarning)
13861390

13871391
if nanRep is not None: # pragma: no cover
1388-
import warnings
13891392
warnings.warn("nanRep is deprecated, use na_rep",
13901393
FutureWarning)
13911394
na_rep = nanRep
13921395

13931396
if colSpace is not None: # pragma: no cover
1394-
import warnings
13951397
warnings.warn("colSpace is deprecated, use col_space",
13961398
FutureWarning)
13971399
col_space = colSpace
@@ -1404,15 +1406,10 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
14041406
justify=justify,
14051407
index_names=index_names,
14061408
header=header, index=index)
1407-
formatter.to_string(force_unicode=force_unicode)
1409+
formatter.to_string()
14081410

14091411
if buf is None:
14101412
result = formatter.buf.getvalue()
1411-
if not force_unicode:
1412-
try:
1413-
result = str(result)
1414-
except ValueError:
1415-
pass
14161413
return result
14171414

14181415
@Appender(fmt.docstring_to_string, indents=1)

pandas/tests/test_format.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def test_to_string_unicode_columns(self):
135135
df.info(buf=buf)
136136
buf.getvalue()
137137

138-
result = self.frame.to_string(force_unicode=True)
138+
result = self.frame.to_string()
139139
self.assert_(isinstance(result, unicode))
140140

141141
def test_to_string_unicode_two(self):
@@ -495,7 +495,6 @@ def test_to_string_int_formatting(self):
495495
self.assert_(issubclass(df['x'].dtype.type, np.integer))
496496

497497
output = df.to_string()
498-
self.assert_(isinstance(output, str))
499498
expected = (' x\n'
500499
'0 -15\n'
501500
'1 20\n'

0 commit comments

Comments
 (0)