From 74d6e29d3251cb34aa64e6bf22be4049ae599fcc Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Fri, 8 Jul 2022 02:29:41 -0500 Subject: [PATCH 1/3] fix GH20868 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/strings/accessor.py | 10 +++++++--- pandas/tests/strings/test_strings.py | 16 ++++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c85a087835b80..6618d2617d8db 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -530,6 +530,7 @@ Conversion Strings ^^^^^^^ - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) +- Bug in :meth:`Series.str.zfill` does not behave the same as ``str.zfill()`` from standard library (:issue:`20868`) - Interval diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index abd380299ba02..2a70ad80fe6d6 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1683,19 +1683,23 @@ def zfill(self, width): Note that ``10`` and ``NaN`` are not strings, therefore they are converted to ``NaN``. The minus sign in ``'-1'`` is treated as a - regular character and the zero is added to the left of it + special character and the zero is added to the right of it (:meth:`str.zfill` would have moved it to the left). ``1000`` remains unchanged as it is longer than `width`. >>> s.str.zfill(3) - 0 0-1 + 0 -01 1 001 2 1000 3 NaN 4 NaN dtype: object """ - result = self.pad(width, side="left", fillchar="0") + if not is_integer(width): + msg = f"width must be of integer type, not {type(width).__name__}" + raise TypeError(msg) + f = lambda x : x.zfill(width) + result = self._data.array._str_map(f) return self._wrap_result(result) def slice(self, start=None, stop=None, step=None): diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index db99ba8368a8a..4a1c10236f9a9 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -799,3 +799,19 @@ def test_str_accessor_in_apply_func(): expected = Series(["A/D", "B/E", "C/F"]) result = df.apply(lambda f: "/".join(f.str.upper()), axis=1) tm.assert_series_equal(result, expected) + + +def test_zfill(): + # https://github.com/pandas-dev/pandas/issues/20868 + value = Series(['-2', '+5']) + wid = "a" + msg = f"width must be of integer type, not {type(wid).__name__}" + with pytest.raises(TypeError, match=msg): + value.str.zfill(wid) + value = Series(['-1', '1', '1000', 10, np.nan]) + expected = Series(['-01', '001', '1000', np.nan, np.nan]) + tm.assert_series_equal(value.str.zfill(3), expected) + + value = Series(['-2', '+5']) + expected = Series(['-0002', '+0005']) + tm.assert_series_equal(value.str.zfill(5), expected) From 448f8d71bf6bf5b3ad9aee0a84382774fbccdff3 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Fri, 8 Jul 2022 14:01:27 -0500 Subject: [PATCH 2/3] fix pre-commit issue --- pandas/core/strings/accessor.py | 2 +- pandas/tests/strings/test_strings.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 2a70ad80fe6d6..d2458c75972d5 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1698,7 +1698,7 @@ def zfill(self, width): if not is_integer(width): msg = f"width must be of integer type, not {type(width).__name__}" raise TypeError(msg) - f = lambda x : x.zfill(width) + f = lambda x: x.zfill(width) result = self._data.array._str_map(f) return self._wrap_result(result) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 4a1c10236f9a9..b545bbdda75fe 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -803,15 +803,15 @@ def test_str_accessor_in_apply_func(): def test_zfill(): # https://github.com/pandas-dev/pandas/issues/20868 - value = Series(['-2', '+5']) + value = Series(["-2", "+5"]) wid = "a" msg = f"width must be of integer type, not {type(wid).__name__}" with pytest.raises(TypeError, match=msg): value.str.zfill(wid) - value = Series(['-1', '1', '1000', 10, np.nan]) - expected = Series(['-01', '001', '1000', np.nan, np.nan]) + value = Series(["-1", "1", "1000", 10, np.nan]) + expected = Series(["-01", "001", "1000", np.nan, np.nan]) tm.assert_series_equal(value.str.zfill(3), expected) - value = Series(['-2', '+5']) - expected = Series(['-0002', '+0005']) + value = Series(["-2", "+5"]) + expected = Series(["-0002", "+0005"]) tm.assert_series_equal(value.str.zfill(5), expected) From 60612e5e15307edc5ec4374e02c7917072b761e2 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Sat, 9 Jul 2022 15:29:26 -0500 Subject: [PATCH 3/3] add more tests and describle the differences --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/strings/test_strings.py | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6618d2617d8db..cbb1d306f9927 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -530,7 +530,7 @@ Conversion Strings ^^^^^^^ - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) -- Bug in :meth:`Series.str.zfill` does not behave the same as ``str.zfill()`` from standard library (:issue:`20868`) +- Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`) - Interval diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index b545bbdda75fe..b55dab8170382 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -803,11 +803,6 @@ def test_str_accessor_in_apply_func(): def test_zfill(): # https://github.com/pandas-dev/pandas/issues/20868 - value = Series(["-2", "+5"]) - wid = "a" - msg = f"width must be of integer type, not {type(wid).__name__}" - with pytest.raises(TypeError, match=msg): - value.str.zfill(wid) value = Series(["-1", "1", "1000", 10, np.nan]) expected = Series(["-01", "001", "1000", np.nan, np.nan]) tm.assert_series_equal(value.str.zfill(3), expected) @@ -815,3 +810,17 @@ def test_zfill(): value = Series(["-2", "+5"]) expected = Series(["-0002", "+0005"]) tm.assert_series_equal(value.str.zfill(5), expected) + + +def test_zfill_with_non_integer_argument(): + value = Series(["-2", "+5"]) + wid = "a" + msg = f"width must be of integer type, not {type(wid).__name__}" + with pytest.raises(TypeError, match=msg): + value.str.zfill(wid) + + +def test_zfill_with_leading_sign(): + value = Series(["-cat", "-1", "+dog"]) + expected = Series(["-0cat", "-0001", "+0dog"]) + tm.assert_series_equal(value.str.zfill(5), expected)