From 1426611143acbc4e9e5bdb368d252b51c847888e Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 4 Sep 2023 13:10:23 +0200 Subject: [PATCH 1/6] correct def is_all_strings --- pandas/core/dtypes/common.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 9da4eac6a42c8..42e909a6b9856 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1664,9 +1664,12 @@ def is_all_strings(value: ArrayLike) -> bool: dtype = value.dtype if isinstance(dtype, np.dtype): - return dtype == np.dtype("object") and lib.is_string_array( - np.asarray(value), skipna=False - ) + if len(value) == 0: + return dtype == np.dtype("object") + else: + return dtype == np.dtype("object") and lib.is_string_array( + np.asarray(value), skipna=False + ) elif isinstance(dtype, CategoricalDtype): return dtype.categories.inferred_type == "string" return dtype == "string" From 91c8b2a58c998792c0bbabfa525d307ec91077b5 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 7 Sep 2023 12:00:43 +0200 Subject: [PATCH 2/6] add a test, correct whatsnew/v2.2.0.rst --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/tests/dtypes/test_dtypes.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 4f38d420a53b4..85b91ae9885b3 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -169,6 +169,7 @@ Bug fixes ~~~~~~~~~ - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) - Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`) +- Bug in :func:`is_all_strings` while checking object array with no elements is of the string dtype (:issue:`54661`) Categorical ^^^^^^^^^^^ diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 6562074eee634..6b62abc50174f 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1212,3 +1212,8 @@ def test_multi_column_dtype_assignment(): df["b"] = 0 tm.assert_frame_equal(df, expected) + + +def test_empty_object_array_is_string_dtype(): + # GH #54661 + assert is_string_dtype(pd.Index([], dtype="O")) From 96680cbff9aebe94b983ddf48a3097a89ffd02ad Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 12 Sep 2023 00:05:35 +0200 Subject: [PATCH 3/6] move the test and add parametrization, correct v2.2.0.rst --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/tests/dtypes/test_common.py | 25 +++++++++++++++++-------- pandas/tests/dtypes/test_dtypes.py | 5 ----- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 85b91ae9885b3..d6ef78d4eca1c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -169,7 +169,7 @@ Bug fixes ~~~~~~~~~ - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) - Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`) -- Bug in :func:`is_all_strings` while checking object array with no elements is of the string dtype (:issue:`54661`) +- Bug in :func:`is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`) Categorical ^^^^^^^^^^^ diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 4507857418e9e..6f6cc5a5ad5d8 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -301,14 +301,23 @@ def test_is_categorical_dtype(): assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) -def test_is_string_dtype(): - assert not com.is_string_dtype(int) - assert not com.is_string_dtype(pd.Series([1, 2])) - - assert com.is_string_dtype(str) - assert com.is_string_dtype(object) - assert com.is_string_dtype(np.array(["a", "b"])) - assert com.is_string_dtype(pd.StringDtype()) +@pytest.mark.parametrize( + "dtype, expected", + [ + (int, False), + (pd.Series([1, 2]), False), + (str, True), + (object, True), + (np.array(["a", "b"]), True), + (pd.StringDtype(), True), + (pd.Index([], dtype="O"), True), + ], +) +def test_is_string_dtype(dtype, expected): + # GH#54661 + + result = com.is_string_dtype(dtype) + assert result is expected @pytest.mark.parametrize( diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 6b62abc50174f..6562074eee634 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1212,8 +1212,3 @@ def test_multi_column_dtype_assignment(): df["b"] = 0 tm.assert_frame_equal(df, expected) - - -def test_empty_object_array_is_string_dtype(): - # GH #54661 - assert is_string_dtype(pd.Index([], dtype="O")) From db0dcdd9cdac15fedb01947b1590ebc12fb331f9 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 13 Sep 2023 12:55:39 +0200 Subject: [PATCH 4/6] remove the line from v2.2.0.rst --- doc/source/whatsnew/v2.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index d6ef78d4eca1c..4f38d420a53b4 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -169,7 +169,6 @@ Bug fixes ~~~~~~~~~ - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) - Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`) -- Bug in :func:`is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`) Categorical ^^^^^^^^^^^ From 46aafed13c63d95d0deb78ecb9216cb8c55ba3ee Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 19 Sep 2023 17:28:34 +0200 Subject: [PATCH 5/6] add the note to v2.1.1.rst --- doc/source/whatsnew/v2.1.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index 42af61be26355..4fb7309f8eb4f 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -34,6 +34,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`) +- Fixed bug in :func:`is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`) - Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`) - Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`) From d076edbd3724ca56be877a9b7dd75d425ef0a31a Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 26 Sep 2023 15:44:00 +0200 Subject: [PATCH 6/6] correct the whatsnew note and move it to 2.2.0 --- doc/source/whatsnew/v2.1.1.rst | 1 - doc/source/whatsnew/v2.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index 4fb7309f8eb4f..42af61be26355 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -34,7 +34,6 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`) -- Fixed bug in :func:`is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`) - Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`) - Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 249f08c7e387b..5b8aaf18b0fe8 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -188,6 +188,7 @@ Bug fixes ~~~~~~~~~ - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) - Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`) +- Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`) Categorical ^^^^^^^^^^^