Skip to content

Commit 9daad6c

Browse files
committed
BUG: split should respect maxsplit when no pat is given
1 parent 2c7be34 commit 9daad6c

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

pandas/core/strings.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,11 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
163163
na : default NaN, fill value for missing values.
164164
regex : bool, default True
165165
If True use re.search, otherwise use Python in operator
166-
166+
167167
Returns
168168
-------
169169
Series of boolean values
170-
170+
171171
See Also
172172
--------
173173
match : analagous, but stricter, relying on re.match instead of re.search
@@ -345,7 +345,7 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False):
345345
346346
See Also
347347
--------
348-
contains : analagous, but less strict, relying on re.search instead of
348+
contains : analagous, but less strict, relying on re.search instead of
349349
re.match
350350
extract : now preferred to the deprecated usage of match (as_indexer=False)
351351
@@ -413,23 +413,23 @@ def str_extract(arr, pat, flags=0):
413413
dtype: object
414414
415415
A pattern with more than one group will return a DataFrame.
416-
416+
417417
>>> Series(['a1', 'b2', 'c3']).str.extract('([ab])(\d)')
418418
0 1
419419
0 a 1
420420
1 b 2
421421
2 NaN NaN
422422
423423
A pattern may contain optional groups.
424-
424+
425425
>>> Series(['a1', 'b2', 'c3']).str.extract('([ab])?(\d)')
426426
0 1
427427
0 a 1
428428
1 b 2
429429
2 NaN 3
430430
431431
Named groups will become column names in the result.
432-
432+
433433
>>> Series(['a1', 'b2', 'c3']).str.extract('(?P<letter>[ab])(?P<digit>\d)')
434434
letter digit
435435
0 a 1
@@ -451,14 +451,14 @@ def f(x):
451451
else:
452452
return empty_row
453453
if regex.groups == 1:
454-
result = Series([f(val)[0] for val in arr],
454+
result = Series([f(val)[0] for val in arr],
455455
name=regex.groupindex.get(1),
456456
index=arr.index)
457457
else:
458458
names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
459459
columns = [names.get(1 + i, i) for i in range(regex.groups)]
460-
result = DataFrame([f(val) for val in arr],
461-
columns=columns,
460+
result = DataFrame([f(val) for val in arr],
461+
columns=columns,
462462
index=arr.index)
463463
return result
464464

@@ -617,7 +617,7 @@ def str_split(arr, pat=None, n=None):
617617
if pat is None:
618618
if n is None or n == 0:
619619
n = -1
620-
f = lambda x: x.split()
620+
f = lambda x: x.split(pat, n)
621621
else:
622622
if len(pat) == 1:
623623
if n is None or n == 0:

pandas/tests/test_strings.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,6 @@ def check_index(index):
565565
tm.makeDateIndex, tm.makePeriodIndex ]:
566566
check_index(index())
567567

568-
569568
def test_get_dummies(self):
570569
s = Series(['a|b', 'a|c', np.nan])
571570
result = s.str.get_dummies('|')
@@ -796,6 +795,12 @@ def test_split_maxsplit(self):
796795
result = s.str.split('asdf', n=-1)
797796
tm.assert_series_equal(result, xp)
798797

798+
def test_split_no_pat_with_nonzero_n(self):
799+
s = Series(['split once', 'split once too!'])
800+
result = s.str.split(n=1)
801+
expected = Series({0: ['split', 'once'], 1: ['split', 'once too!']})
802+
tm.assert_series_equal(expected, result)
803+
799804
def test_pipe_failures(self):
800805
# #2119
801806
s = Series(['A|B|C'])
@@ -1092,6 +1097,7 @@ def test_encode_decode_errors(self):
10921097

10931098
tm.assert_series_equal(result, exp)
10941099

1100+
10951101
if __name__ == '__main__':
10961102
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
10971103
exit=False)

0 commit comments

Comments
 (0)