Skip to content

Commit d817d5c

Browse files
committed
Re-allow bytes in str._validate_
1 parent 3ae999a commit d817d5c

File tree

2 files changed

+34
-33
lines changed

2 files changed

+34
-33
lines changed

pandas/core/strings.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1822,7 +1822,10 @@ def _validate(data):
18221822
'not MultiIndex')
18231823

18241824
# see src/inference.pyx which can contain string values
1825-
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
1825+
allowed_types = ['string', 'unicode', 'mixed', 'mixed-integer']
1826+
if isinstance(data, ABCSeries):
1827+
allowed_types = allowed_types + ['bytes']
1828+
18261829
values = data if isinstance(data, Index) else data.values
18271830
if is_categorical_dtype(data.dtype):
18281831
inf_type = lib.infer_dtype(values.categories)

pandas/tests/test_strings.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3016,35 +3016,35 @@ def test_match_findall_flags(self):
30163016
result = data.str.contains(pat, flags=re.IGNORECASE)
30173017
assert result[0]
30183018

3019-
# def test_encode_decode(self):
3020-
# base = Series([u('a'), u('b'), u('a\xe4')])
3021-
# series = base.str.encode('utf-8')
3022-
#
3023-
# f = lambda x: x.decode('utf-8')
3024-
# result = series.str.decode('utf-8')
3025-
# exp = series.map(f)
3026-
#
3027-
# tm.assert_series_equal(result, exp)
3028-
#
3029-
# def test_encode_decode_errors(self):
3030-
# encodeBase = Series([u('a'), u('b'), u('a\x9d')])
3031-
#
3032-
# pytest.raises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252')
3033-
#
3034-
# f = lambda x: x.encode('cp1252', 'ignore')
3035-
# result = encodeBase.str.encode('cp1252', 'ignore')
3036-
# exp = encodeBase.map(f)
3037-
# tm.assert_series_equal(result, exp)
3038-
#
3039-
# decodeBase = Series([b'a', b'b', b'a\x9d'])
3040-
#
3041-
# pytest.raises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252')
3042-
#
3043-
# f = lambda x: x.decode('cp1252', 'ignore')
3044-
# result = decodeBase.str.decode('cp1252', 'ignore')
3045-
# exp = decodeBase.map(f)
3046-
#
3047-
# tm.assert_series_equal(result, exp)
3019+
def test_encode_decode(self):
3020+
base = Series([u('a'), u('b'), u('a\xe4')])
3021+
series = base.str.encode('utf-8')
3022+
3023+
f = lambda x: x.decode('utf-8')
3024+
result = series.str.decode('utf-8')
3025+
exp = series.map(f)
3026+
3027+
tm.assert_series_equal(result, exp)
3028+
3029+
def test_encode_decode_errors(self):
3030+
encodeBase = Series([u('a'), u('b'), u('a\x9d')])
3031+
3032+
pytest.raises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252')
3033+
3034+
f = lambda x: x.encode('cp1252', 'ignore')
3035+
result = encodeBase.str.encode('cp1252', 'ignore')
3036+
exp = encodeBase.map(f)
3037+
tm.assert_series_equal(result, exp)
3038+
3039+
decodeBase = Series([b'a', b'b', b'a\x9d'])
3040+
3041+
pytest.raises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252')
3042+
3043+
f = lambda x: x.decode('cp1252', 'ignore')
3044+
result = decodeBase.str.decode('cp1252', 'ignore')
3045+
exp = decodeBase.map(f)
3046+
3047+
tm.assert_series_equal(result, exp)
30483048

30493049
def test_normalize(self):
30503050
values = ['ABC', u'ABC', u'123', np.nan, u'アイエ']
@@ -3130,9 +3130,7 @@ def test_method_on_bytes(self):
31303130
lhs = Series(np.array(list('abc'), 'S1').astype(object))
31313131
rhs = Series(np.array(list('def'), 'S1').astype(object))
31323132
if compat.PY3:
3133-
message = 'Can only use .str accessor with string values'
3134-
with tm.assert_raises_regex(AttributeError, message):
3135-
lhs.str
3133+
pytest.raises(TypeError, lhs.str.cat, rhs, sep=',')
31363134
else:
31373135
result = lhs.str.cat(rhs)
31383136
expected = Series(np.array(

0 commit comments

Comments
 (0)