Skip to content

Commit dd7cee6

Browse files
author
Montana Low
committed
don't raise exceptions splitting a blank string
1 parent 3783ccc commit dd7cee6

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

pandas/core/strings.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1633,7 +1633,8 @@ def cons_row(x):
16331633
if result:
16341634
# propagate nan values to match longest sequence (GH 18450)
16351635
max_len = max(len(x) for x in result)
1636-
result = [x * max_len if x[0] is np.nan else x for x in result]
1636+
result = [x * max_len if len(x) == 0 or x[0] is np.nan
1637+
else x for x in result]
16371638

16381639
if not isinstance(expand, bool):
16391640
raise ValueError("expand must be True or False")

pandas/tests/test_strings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,6 +1942,13 @@ def test_split(self):
19421942
exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA,
19431943
[u('f'), u('g'), u('h')]])
19441944
tm.assert_series_equal(result, exp)
1945+
1946+
# expand blank split
1947+
values = Series(['a b c', 'a b', '', ' '])
1948+
result = values.str.split(expand=True)
1949+
exp = DataFrame([['a', 'b', 'c'], ['a', 'b', np.nan],
1950+
[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]])
1951+
tm.assert_series_equal(result, exp)
19451952

19461953
def test_rsplit(self):
19471954
values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])

0 commit comments

Comments
 (0)