Skip to content

Commit 31d4394

Browse files
committed
Re-allow bytes in str._validate_
1 parent b3337d0 commit 31d4394

File tree

2 files changed

+34
-33
lines changed

2 files changed

+34
-33
lines changed

pandas/core/strings.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1822,7 +1822,10 @@ def _validate(data):
18221822
'not MultiIndex')
18231823

18241824
# see src/inference.pyx which can contain string values
1825-
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
1825+
allowed_types = ['string', 'unicode', 'mixed', 'mixed-integer']
1826+
if isinstance(data, ABCSeries):
1827+
allowed_types = allowed_types + ['bytes']
1828+
18261829
values = data if isinstance(data, Index) else data.values
18271830
if is_categorical_dtype(data.dtype):
18281831
inf_type = lib.infer_dtype(values.categories)

pandas/tests/test_strings.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3006,35 +3006,35 @@ def test_match_findall_flags(self):
30063006
result = data.str.contains(pat, flags=re.IGNORECASE)
30073007
assert result[0]
30083008

3009-
# def test_encode_decode(self):
3010-
# base = Series([u('a'), u('b'), u('a\xe4')])
3011-
# series = base.str.encode('utf-8')
3012-
#
3013-
# f = lambda x: x.decode('utf-8')
3014-
# result = series.str.decode('utf-8')
3015-
# exp = series.map(f)
3016-
#
3017-
# tm.assert_series_equal(result, exp)
3018-
#
3019-
# def test_encode_decode_errors(self):
3020-
# encodeBase = Series([u('a'), u('b'), u('a\x9d')])
3021-
#
3022-
# pytest.raises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252')
3023-
#
3024-
# f = lambda x: x.encode('cp1252', 'ignore')
3025-
# result = encodeBase.str.encode('cp1252', 'ignore')
3026-
# exp = encodeBase.map(f)
3027-
# tm.assert_series_equal(result, exp)
3028-
#
3029-
# decodeBase = Series([b'a', b'b', b'a\x9d'])
3030-
#
3031-
# pytest.raises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252')
3032-
#
3033-
# f = lambda x: x.decode('cp1252', 'ignore')
3034-
# result = decodeBase.str.decode('cp1252', 'ignore')
3035-
# exp = decodeBase.map(f)
3036-
#
3037-
# tm.assert_series_equal(result, exp)
3009+
def test_encode_decode(self):
3010+
base = Series([u('a'), u('b'), u('a\xe4')])
3011+
series = base.str.encode('utf-8')
3012+
3013+
f = lambda x: x.decode('utf-8')
3014+
result = series.str.decode('utf-8')
3015+
exp = series.map(f)
3016+
3017+
tm.assert_series_equal(result, exp)
3018+
3019+
def test_encode_decode_errors(self):
3020+
encodeBase = Series([u('a'), u('b'), u('a\x9d')])
3021+
3022+
pytest.raises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252')
3023+
3024+
f = lambda x: x.encode('cp1252', 'ignore')
3025+
result = encodeBase.str.encode('cp1252', 'ignore')
3026+
exp = encodeBase.map(f)
3027+
tm.assert_series_equal(result, exp)
3028+
3029+
decodeBase = Series([b'a', b'b', b'a\x9d'])
3030+
3031+
pytest.raises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252')
3032+
3033+
f = lambda x: x.decode('cp1252', 'ignore')
3034+
result = decodeBase.str.decode('cp1252', 'ignore')
3035+
exp = decodeBase.map(f)
3036+
3037+
tm.assert_series_equal(result, exp)
30383038

30393039
def test_normalize(self):
30403040
values = ['ABC', u'ABC', u'123', np.nan, u'アイエ']
@@ -3120,9 +3120,7 @@ def test_method_on_bytes(self):
31203120
lhs = Series(np.array(list('abc'), 'S1').astype(object))
31213121
rhs = Series(np.array(list('def'), 'S1').astype(object))
31223122
if compat.PY3:
3123-
message = 'Can only use .str accessor with string values'
3124-
with tm.assert_raises_regex(AttributeError, message):
3125-
lhs.str
3123+
pytest.raises(TypeError, lhs.str.cat, rhs, sep=',')
31263124
else:
31273125
result = lhs.str.cat(rhs)
31283126
expected = Series(np.array(

0 commit comments

Comments
 (0)