pandas-dev · topper-123 · Jun 12, 2019 · Jun 12, 2019
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -24,17 +24,6 @@
 # found at https://bitbucket.org/gutworth/six
 
 
-def to_str(s):
-    """
-    Convert bytes and non-string into Python 3 str
-    """
-    if isinstance(s, bytes):
-        s = s.decode('utf-8')
-    elif not isinstance(s, str):
-        s = str(s)
-    return s
-
-
 def set_function_name(f, name, cls):
     """
     Bind the name/qualname attributes of the function

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -6,12 +6,11 @@
 
 from pandas._libs import lib, tslib, tslibs
 from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT
-from pandas.compat import to_str
 
 from .common import (
     _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8,
-    ensure_int16, ensure_int32, ensure_int64, ensure_object, is_bool,
-    is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
+    ensure_int16, ensure_int32, ensure_int64, ensure_object, ensure_str,
+    is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
     is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
     is_datetime_or_timedelta_dtype, is_datetimelike, is_dtype_equal,
     is_extension_array_dtype, is_extension_type, is_float, is_float_dtype,
@@ -1189,7 +1188,7 @@ def construct_1d_arraylike_from_scalar(value, length, dtype):
             # to allow numpy to take our string as a scalar value
             dtype = object
             if not isna(value):
-                value = to_str(value)
+                value = ensure_str(value)
 
         subarr = np.empty(length, dtype=dtype)
         subarr.fill(value)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1,5 +1,5 @@
 """ common type operations """
-from typing import Union
+from typing import Any, Union
 import warnings
 
 import numpy as np
@@ -69,6 +69,17 @@ def ensure_float(arr):
 ensure_object = algos.ensure_object
 
 
+def ensure_str(value: Union[bytes, Any]) -> str:
+    """
+    Ensure that bytes and non-strings get converted into ``str`` objects.
+    """
+    if isinstance(value, bytes):
+        value = value.decode('utf-8')
+    elif not isinstance(value, str):
+        value = str(value)
+    return value
+
+
 def ensure_categorical(arr):
     """
     Ensure that an array-like object is a Categorical (if not already).

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -15,7 +15,7 @@
 from pandas._config import config
 
 from pandas._libs import Timestamp, iNaT, properties
-from pandas.compat import set_function_name, to_str
+from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import (
@@ -24,7 +24,7 @@
 
 from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
 from pandas.core.dtypes.common import (
-    ensure_int64, ensure_object, is_bool, is_bool_dtype,
+    ensure_int64, ensure_object, ensure_str, is_bool, is_bool_dtype,
     is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
     is_dict_like, is_extension_array_dtype, is_integer, is_list_like,
     is_number, is_numeric_dtype, is_object_dtype, is_period_arraylike,
@@ -4564,12 +4564,12 @@ def filter(self, items=None, like=None, regex=None, axis=None):
                 **{name: [r for r in items if r in labels]})
         elif like:
             def f(x):
-                return like in to_str(x)
+                return like in ensure_str(x)
             values = labels.map(f)
             return self.loc(axis=axis)[values]
         elif regex:
             def f(x):
-                return matcher.search(to_str(x)) is not None
+                return matcher.search(ensure_str(x)) is not None
             matcher = re.compile(regex)
             values = labels.map(f)
             return self.loc(axis=axis)[values]

diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -6,10 +6,9 @@
 
 import pandas._libs.json as json
 from pandas._libs.tslibs import iNaT
-from pandas.compat import to_str
 from pandas.errors import AbstractMethodError
 
-from pandas.core.dtypes.common import is_period_dtype
+from pandas.core.dtypes.common import ensure_str, is_period_dtype
 
 from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
 from pandas.core.reshape.concat import concat
@@ -545,8 +544,7 @@ def read(self):
         if self.lines and self.chunksize:
             obj = concat(self)
         elif self.lines:
-
-            data = to_str(self.data)
+            data = ensure_str(self.data)
             obj = self._get_object_parser(
                 self._combine_lines(data.split('\n'))
             )

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -17,16 +17,16 @@
 import pandas._libs.ops as libops
 import pandas._libs.parsers as parsers
 from pandas._libs.tslibs import parsing
-import pandas.compat as compat
 from pandas.errors import (
     AbstractMethodError, EmptyDataError, ParserError, ParserWarning)
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.common import (
-    ensure_object, is_bool_dtype, is_categorical_dtype, is_dtype_equal,
-    is_extension_array_dtype, is_float, is_integer, is_integer_dtype,
-    is_list_like, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
+    ensure_object, ensure_str, is_bool_dtype, is_categorical_dtype,
+    is_dtype_equal, is_extension_array_dtype, is_float, is_integer,
+    is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
+    is_string_dtype, pandas_dtype)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna
 
@@ -1494,7 +1494,7 @@ def extract(r):
         # If we find unnamed columns all in a single
         # level, then our header was too long.
         for n in range(len(columns[0])):
-            if all(compat.to_str(c[n]) in self.unnamed_cols for c in columns):
+            if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
                 raise ParserError(
                     "Passed header=[{header}] are too many rows for this "
                     "multi_index of columns"