@@ -163,11 +163,11 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
163
163
na : default NaN, fill value for missing values.
164
164
regex : bool, default True
165
165
If True use re.search, otherwise use Python in operator
166
-
166
+
167
167
Returns
168
168
-------
169
169
Series of boolean values
170
-
170
+
171
171
See Also
172
172
--------
173
173
match : analagous, but stricter, relying on re.match instead of re.search
@@ -345,7 +345,7 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False):
345
345
346
346
See Also
347
347
--------
348
- contains : analagous, but less strict, relying on re.search instead of
348
+ contains : analagous, but less strict, relying on re.search instead of
349
349
re.match
350
350
extract : now preferred to the deprecated usage of match (as_indexer=False)
351
351
@@ -413,23 +413,23 @@ def str_extract(arr, pat, flags=0):
413
413
dtype: object
414
414
415
415
A pattern with more than one group will return a DataFrame.
416
-
416
+
417
417
>>> Series(['a1', 'b2', 'c3']).str.extract('([ab])(\d)')
418
418
0 1
419
419
0 a 1
420
420
1 b 2
421
421
2 NaN NaN
422
422
423
423
A pattern may contain optional groups.
424
-
424
+
425
425
>>> Series(['a1', 'b2', 'c3']).str.extract('([ab])?(\d)')
426
426
0 1
427
427
0 a 1
428
428
1 b 2
429
429
2 NaN 3
430
430
431
431
Named groups will become column names in the result.
432
-
432
+
433
433
>>> Series(['a1', 'b2', 'c3']).str.extract('(?P<letter>[ab])(?P<digit>\d)')
434
434
letter digit
435
435
0 a 1
@@ -451,14 +451,14 @@ def f(x):
451
451
else :
452
452
return empty_row
453
453
if regex .groups == 1 :
454
- result = Series ([f (val )[0 ] for val in arr ],
454
+ result = Series ([f (val )[0 ] for val in arr ],
455
455
name = regex .groupindex .get (1 ),
456
456
index = arr .index )
457
457
else :
458
458
names = dict (zip (regex .groupindex .values (), regex .groupindex .keys ()))
459
459
columns = [names .get (1 + i , i ) for i in range (regex .groups )]
460
- result = DataFrame ([f (val ) for val in arr ],
461
- columns = columns ,
460
+ result = DataFrame ([f (val ) for val in arr ],
461
+ columns = columns ,
462
462
index = arr .index )
463
463
return result
464
464
@@ -617,7 +617,7 @@ def str_split(arr, pat=None, n=None):
617
617
if pat is None :
618
618
if n is None or n == 0 :
619
619
n = - 1
620
- f = lambda x : x .split ()
620
+ f = lambda x : x .split (pat , n )
621
621
else :
622
622
if len (pat ) == 1 :
623
623
if n is None or n == 0 :
0 commit comments