ENH: to_string() and to_str_columns() should return unicode, deprecate force_unicode #2225

y-p · y-p · commit 2599741cbda7 · 2012-11-22T20:48:42.000+02:00
using pprint_thing will try to decode using utf-8 as a fallback,
but by these functions will now return unicode() rather then str()
objects.
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -135,9 +135,7 @@ def to_string(self):
         if footer:
             result.append(footer)
 
-        if py3compat.PY3:
-            return unicode(u'\n'.join(result))
-        return com.console_encode(u'\n'.join(result))
+        return unicode(u'\n'.join(result))
 
 if py3compat.PY3:  # pragma: no cover
     _encode_diff = lambda x: 0
@@ -200,10 +198,15 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
         else:
             self.columns = frame.columns
 
-    def _to_str_columns(self, force_unicode=False):
+    def _to_str_columns(self, force_unicode=None):
         """
         Render a DataFrame to a list of columns (as lists of strings).
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
+
         # may include levels names also
         str_index = self._get_formatted_index()
         str_columns = self._get_formatted_column_labels()
@@ -237,32 +240,17 @@ def _to_str_columns(self, force_unicode=False):
         if self.index:
             strcols.insert(0, str_index)
 
-        if not py3compat.PY3:
-            if force_unicode:
-                def make_unicode(x):
-                    if isinstance(x, unicode):
-                        return x
-                    return x.decode('utf-8')
-                strcols = map(lambda col: map(make_unicode, col), strcols)
-            else:
-                # Generally everything is plain strings, which has ascii
-                # encoding.  Problem is when there is a char with value over
-                # 127. Everything then gets converted to unicode.
-                try:
-                    map(lambda col: map(str, col), strcols)
-                except UnicodeError:
-                    def make_unicode(x):
-                        if isinstance(x, unicode):
-                            return x
-                        return x.decode('utf-8')
-                    strcols = map(lambda col: map(make_unicode, col), strcols)
-
         return strcols
 
-    def to_string(self, force_unicode=False):
+    def to_string(self, force_unicode=None):
         """
         Render a DataFrame to a console-friendly tabular output.
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
+
         frame = self.frame
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -272,15 +260,20 @@ def to_string(self, force_unicode=False):
                             com.pprint_thing(frame.index)))
             text = info_line
         else:
-            strcols = self._to_str_columns(force_unicode)
+            strcols = self._to_str_columns()
             text = adjoin(1, *strcols)
 
         self.buf.writelines(text)
 
-    def to_latex(self, force_unicode=False, column_format=None):
+    def to_latex(self, force_unicode=None, column_format=None):
         """
         Render a DataFrame to a LaTeX tabular environment output.
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
+
         frame = self.frame
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -289,7 +282,7 @@ def to_latex(self, force_unicode=False, column_format=None):
                             frame.columns, frame.index))
             strcols = [[info_line]]
         else:
-            strcols = self._to_str_columns(force_unicode)
+            strcols = self._to_str_columns()
 
         if column_format is None:
             column_format = '|l|%s|' % '|'.join('c' for _ in strcols)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1379,19 +1379,21 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
     def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
                   header=True, index=True, na_rep='NaN', formatters=None,
                   float_format=None, sparsify=None, nanRep=None,
-                  index_names=True, justify=None, force_unicode=False):
+                  index_names=True, justify=None, force_unicode=None):
         """
         Render a DataFrame to a console-friendly tabular output.
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
 
         if nanRep is not None:  # pragma: no cover
-            import warnings
             warnings.warn("nanRep is deprecated, use na_rep",
                           FutureWarning)
             na_rep = nanRep
 
         if colSpace is not None:  # pragma: no cover
-            import warnings
             warnings.warn("colSpace is deprecated, use col_space",
                           FutureWarning)
             col_space = colSpace
@@ -1404,15 +1406,10 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
                                            justify=justify,
                                            index_names=index_names,
                                            header=header, index=index)
-        formatter.to_string(force_unicode=force_unicode)
+        formatter.to_string()
 
         if buf is None:
             result = formatter.buf.getvalue()
-            if not force_unicode:
-                try:
-                    result = str(result)
-                except ValueError:
-                    pass
             return result
 
     @Appender(fmt.docstring_to_string, indents=1)
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -135,7 +135,7 @@ def test_to_string_unicode_columns(self):
         df.info(buf=buf)
         buf.getvalue()
 
-        result = self.frame.to_string(force_unicode=True)
+        result = self.frame.to_string()
         self.assert_(isinstance(result, unicode))
 
     def test_to_string_unicode_two(self):
@@ -495,7 +495,6 @@ def test_to_string_int_formatting(self):
         self.assert_(issubclass(df['x'].dtype.type, np.integer))
 
         output = df.to_string()
-        self.assert_(isinstance(output, str))
         expected = ('    x\n'
                     '0 -15\n'
                     '1  20\n'