diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 7036d0e1428fe..4459e66540dac 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -24,17 +24,6 @@ # found at https://bitbucket.org/gutworth/six -def to_str(s): - """ - Convert bytes and non-string into Python 3 str - """ - if isinstance(s, bytes): - s = s.decode('utf-8') - elif not isinstance(s, str): - s = str(s) - return s - - def set_function_name(f, name, cls): """ Bind the name/qualname attributes of the function diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d0f392df70c85..2f66e9ed46aa0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,12 +6,11 @@ from pandas._libs import lib, tslib, tslibs from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT -from pandas.compat import to_str from .common import ( _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8, - ensure_int16, ensure_int32, ensure_int64, ensure_object, is_bool, - is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype, + ensure_int16, ensure_int32, ensure_int64, ensure_object, ensure_str, + is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_datetimelike, is_dtype_equal, is_extension_array_dtype, is_extension_type, is_float, is_float_dtype, @@ -1189,7 +1188,7 @@ def construct_1d_arraylike_from_scalar(value, length, dtype): # to allow numpy to take our string as a scalar value dtype = object if not isna(value): - value = to_str(value) + value = ensure_str(value) subarr = np.empty(length, dtype=dtype) subarr.fill(value) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 52011d53d22cd..ce99d150880c6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1,5 +1,5 @@ """ common type operations """ -from typing import Union +from typing import Any, Union import warnings import numpy as np @@ -69,6 +69,17 @@ def ensure_float(arr): ensure_object = algos.ensure_object +def ensure_str(value: Union[bytes, Any]) -> str: + """ + Ensure that bytes and non-strings get converted into ``str`` objects. + """ + if isinstance(value, bytes): + value = value.decode('utf-8') + elif not isinstance(value, str): + value = str(value) + return value + + def ensure_categorical(arr): """ Ensure that an array-like object is a Categorical (if not already). diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d85a3a1ddeff0..865eab9d71eff 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -15,7 +15,7 @@ from pandas._config import config from pandas._libs import Timestamp, iNaT, properties -from pandas.compat import set_function_name, to_str +from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import ( @@ -24,7 +24,7 @@ from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.common import ( - ensure_int64, ensure_object, is_bool, is_bool_dtype, + ensure_int64, ensure_object, ensure_str, is_bool, is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_dict_like, is_extension_array_dtype, is_integer, is_list_like, is_number, is_numeric_dtype, is_object_dtype, is_period_arraylike, @@ -4564,12 +4564,12 @@ def filter(self, items=None, like=None, regex=None, axis=None): **{name: [r for r in items if r in labels]}) elif like: def f(x): - return like in to_str(x) + return like in ensure_str(x) values = labels.map(f) return self.loc(axis=axis)[values] elif regex: def f(x): - return matcher.search(to_str(x)) is not None + return matcher.search(ensure_str(x)) is not None matcher = re.compile(regex) values = labels.map(f) return self.loc(axis=axis)[values] diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 20bed9bff7383..7bafa15bb1979 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -6,10 +6,9 @@ import pandas._libs.json as json from pandas._libs.tslibs import iNaT -from pandas.compat import to_str from pandas.errors import AbstractMethodError -from pandas.core.dtypes.common import is_period_dtype +from pandas.core.dtypes.common import ensure_str, is_period_dtype from pandas import DataFrame, MultiIndex, Series, isna, to_datetime from pandas.core.reshape.concat import concat @@ -545,8 +544,7 @@ def read(self): if self.lines and self.chunksize: obj = concat(self) elif self.lines: - - data = to_str(self.data) + data = ensure_str(self.data) obj = self._get_object_parser( self._combine_lines(data.split('\n')) ) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index bcbdd80865360..2daed630f1c69 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -17,16 +17,16 @@ import pandas._libs.ops as libops import pandas._libs.parsers as parsers from pandas._libs.tslibs import parsing -import pandas.compat as compat from pandas.errors import ( AbstractMethodError, EmptyDataError, ParserError, ParserWarning) from pandas.util._decorators import Appender from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( - ensure_object, is_bool_dtype, is_categorical_dtype, is_dtype_equal, - is_extension_array_dtype, is_float, is_integer, is_integer_dtype, - is_list_like, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype) + ensure_object, ensure_str, is_bool_dtype, is_categorical_dtype, + is_dtype_equal, is_extension_array_dtype, is_float, is_integer, + is_integer_dtype, is_list_like, is_object_dtype, is_scalar, + is_string_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.missing import isna @@ -1494,7 +1494,7 @@ def extract(r): # If we find unnamed columns all in a single # level, then our header was too long. for n in range(len(columns[0])): - if all(compat.to_str(c[n]) in self.unnamed_cols for c in columns): + if all(ensure_str(col[n]) in self.unnamed_cols for col in columns): raise ParserError( "Passed header=[{header}] are too many rows for this " "multi_index of columns"