Skip to content

BUG: Implement step in slice StringMethod #8843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ Bug Fixes
- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`).
- ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`).
- ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`)
- ``slice`` string method now takes step into account (:issue:`8754`)



Expand Down
7 changes: 4 additions & 3 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,14 +666,15 @@ def str_split(arr, pat=None, n=None, return_type='series'):
return res


def str_slice(arr, start=None, stop=None, step=1):
def str_slice(arr, start=None, stop=None, step=None):
"""
Slice substrings from each element in array

Parameters
----------
start : int or None
stop : int or None
step : int or None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe int, default 1 instead?
Or actually, the default could also be just None for step. That is the same as 1 and seems more consistent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, that's done


Returns
-------
Expand Down Expand Up @@ -993,8 +994,8 @@ def center(self, width):
return self._wrap_result(result)

@copy(str_slice)
def slice(self, start=None, stop=None, step=1):
result = str_slice(self.series, start, stop)
def slice(self, start=None, stop=None, step=None):
result = str_slice(self.series, start, stop, step)
return self._wrap_result(result)

@copy(str_slice)
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ def test_empty_str_methods(self):
tm.assert_series_equal(empty_str, empty.str.center(42))
tm.assert_series_equal(empty_list, empty.str.split('a'))
tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
tm.assert_series_equal(empty_str, empty.str.slice(step=1))
tm.assert_series_equal(empty_str, empty.str.strip())
tm.assert_series_equal(empty_str, empty.str.lstrip())
tm.assert_series_equal(empty_str, empty.str.rstrip())
Expand Down Expand Up @@ -922,6 +923,17 @@ def test_slice(self):
exp = Series(['foo', 'bar', NA, 'baz'])
tm.assert_series_equal(result, exp)

for start, stop, step in [(0, 3, -1), (None, None, -1),
(3, 10, 2), (3, 0, -1)]:
try:
result = values.str.slice(start, stop, step)
expected = Series([s[start:stop:step] if not isnull(s) else NA for s in
values])
tm.assert_series_equal(result, expected)
except:
print('failed on %s:%s:%s' % (start, stop, step))
raise

# mixed
mixed = Series(['aafootwo', NA, 'aabartwo', True, datetime.today(),
None, 1, 2.])
Expand All @@ -933,6 +945,10 @@ def test_slice(self):
tm.assert_isinstance(rs, Series)
tm.assert_almost_equal(rs, xp)

rs = Series(mixed).str.slice(2, 5, -1)
xp = Series(['oof', NA, 'rab', NA, NA,
NA, NA, NA])

# unicode
values = Series([u('aafootwo'), u('aabartwo'), NA,
u('aabazqux')])
Expand All @@ -941,6 +957,10 @@ def test_slice(self):
exp = Series([u('foo'), u('bar'), NA, u('baz')])
tm.assert_series_equal(result, exp)

result = values.str.slice(0, -1, 2)
exp = Series([u('afow'), u('abrw'), NA, u('abzu')])
tm.assert_series_equal(result, exp)

def test_slice_replace(self):
pass

Expand Down Expand Up @@ -1151,6 +1171,10 @@ def test_string_slice_get_syntax(self):
expected = s.str.slice(stop=3)
assert_series_equal(result, expected)

result = s.str[2::-1]
expected = s.str.slice(start=2, step=-1)
assert_series_equal(result, expected)

def test_string_slice_out_of_bounds(self):
s = Series([(1, 2), (1,), (3,4,5)])

Expand Down