From 87e035fec1db27ee88dabe877235a04d10b0b767 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 13 Aug 2023 22:12:13 +0200 Subject: [PATCH 1/2] REF: Replace "pyarrow" string storage checks with variable --- pandas/tests/arrays/string_/__init__.py | 1 + pandas/tests/arrays/string_/test_string.py | 11 ++++++----- pandas/tests/arrays/string_/test_string_arrow.py | 3 ++- pandas/tests/extension/test_string.py | 8 +++++--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/tests/arrays/string_/__init__.py b/pandas/tests/arrays/string_/__init__.py index e69de29bb2d1d..1bb07246381b1 100644 --- a/pandas/tests/arrays/string_/__init__.py +++ b/pandas/tests/arrays/string_/__init__.py @@ -0,0 +1 @@ +arrow_string_storage = ("pyarrow",) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index cfd3314eb5944..62c04dbebde94 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -10,6 +10,7 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.tests.arrays.string_ import arrow_string_storage from pandas.util.version import Version @@ -116,7 +117,7 @@ def test_add(dtype): def test_add_2d(dtype, request): - if dtype.storage == "pyarrow": + if dtype.storage in arrow_string_storage: reason = "Failed: DID NOT RAISE " mark = pytest.mark.xfail(raises=None, reason=reason) request.node.add_marker(mark) @@ -145,7 +146,7 @@ def test_add_sequence(dtype): def test_mul(dtype, request): - if dtype.storage == "pyarrow": + if dtype.storage in arrow_string_storage: reason = "unsupported operand type(s) for *: 'ArrowStringArray' and 'int'" mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) request.node.add_marker(mark) @@ -370,7 +371,7 @@ def test_min_max(method, skipna, dtype, request): @pytest.mark.parametrize("method", ["min", "max"]) @pytest.mark.parametrize("box", [pd.Series, pd.array]) def test_min_max_numpy(method, box, dtype, request): - if dtype.storage == "pyarrow" and box is pd.array: + if dtype.storage in arrow_string_storage and box is pd.array: if box is pd.array: reason = "'<=' not supported between instances of 'str' and 'NoneType'" else: @@ -397,7 +398,7 @@ def test_fillna_args(dtype, request): expected = pd.array(["a", "b"], dtype=dtype) tm.assert_extension_array_equal(res, expected) - if dtype.storage == "pyarrow": + if dtype.storage in arrow_string_storage: msg = "Invalid value '1' for dtype string" else: msg = "Cannot set non-string value '1' into a StringArray." @@ -506,7 +507,7 @@ def test_use_inf_as_na(values, expected, dtype): def test_memory_usage(dtype): # GH 33963 - if dtype.storage == "pyarrow": + if dtype.storage in arrow_string_storage: pytest.skip(f"not applicable for {dtype.storage}") series = pd.Series(["a", "b", "c"], dtype=dtype) diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index 6912d5038ae0d..c645e13ecbd6f 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -13,6 +13,7 @@ StringDtype, ) from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.tests.arrays.string_ import arrow_string_storage skip_if_no_pyarrow = pytest.mark.skipif( pa_version_under7p0, @@ -52,7 +53,7 @@ def test_config_bad_storage_raises(): def test_constructor_not_string_type_raises(array, chunked): import pyarrow as pa - array = pa if array == "pyarrow" else np + array = pa if array in arrow_string_storage else np arr = array.array([1, 2, 3]) if chunked: diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 6597ff84e3ca4..1a99827426383 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -25,6 +25,8 @@ from pandas.core.arrays.string_ import StringDtype from pandas.tests.extension import base +arrow_string_storage = ("pyarrow",) + def split_array(arr): if arr.dtype.storage != "pyarrow": @@ -104,7 +106,7 @@ def test_is_not_string_type(self, dtype): class TestInterface(base.BaseInterfaceTests): def test_view(self, data, request): - if data.dtype.storage == "pyarrow": + if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_view(data) @@ -117,7 +119,7 @@ def test_from_dtype(self, data): class TestReshaping(base.BaseReshapingTests): def test_transpose(self, data, request): - if data.dtype.storage == "pyarrow": + if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_transpose(data) @@ -128,7 +130,7 @@ class TestGetitem(base.BaseGetitemTests): class TestSetitem(base.BaseSetitemTests): def test_setitem_preserves_views(self, data, request): - if data.dtype.storage == "pyarrow": + if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_setitem_preserves_views(data) From 54ce5851d0c0f618ed2be61a6a2dda8d9bbc5207 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 16 Aug 2023 23:31:22 +0200 Subject: [PATCH 2/2] Update --- pandas/conftest.py | 5 +++++ pandas/tests/arrays/string_/__init__.py | 1 - pandas/tests/arrays/string_/test_string.py | 11 +++++------ pandas/tests/arrays/string_/test_string_arrow.py | 3 +-- pandas/tests/extension/test_string.py | 8 +++----- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index f756da82157b8..c2ffddf661921 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1996,3 +1996,8 @@ def warsaw(request) -> str: tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo. """ return request.param + + +@pytest.fixture() +def arrow_string_storage(): + return ("pyarrow",) diff --git a/pandas/tests/arrays/string_/__init__.py b/pandas/tests/arrays/string_/__init__.py index 1bb07246381b1..e69de29bb2d1d 100644 --- a/pandas/tests/arrays/string_/__init__.py +++ b/pandas/tests/arrays/string_/__init__.py @@ -1 +0,0 @@ -arrow_string_storage = ("pyarrow",) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 62c04dbebde94..de93e89ecacd5 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -10,7 +10,6 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays.string_arrow import ArrowStringArray -from pandas.tests.arrays.string_ import arrow_string_storage from pandas.util.version import Version @@ -116,7 +115,7 @@ def test_add(dtype): tm.assert_series_equal(result, expected) -def test_add_2d(dtype, request): +def test_add_2d(dtype, request, arrow_string_storage): if dtype.storage in arrow_string_storage: reason = "Failed: DID NOT RAISE " mark = pytest.mark.xfail(raises=None, reason=reason) @@ -145,7 +144,7 @@ def test_add_sequence(dtype): tm.assert_extension_array_equal(result, expected) -def test_mul(dtype, request): +def test_mul(dtype, request, arrow_string_storage): if dtype.storage in arrow_string_storage: reason = "unsupported operand type(s) for *: 'ArrowStringArray' and 'int'" mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) @@ -370,7 +369,7 @@ def test_min_max(method, skipna, dtype, request): @pytest.mark.parametrize("method", ["min", "max"]) @pytest.mark.parametrize("box", [pd.Series, pd.array]) -def test_min_max_numpy(method, box, dtype, request): +def test_min_max_numpy(method, box, dtype, request, arrow_string_storage): if dtype.storage in arrow_string_storage and box is pd.array: if box is pd.array: reason = "'<=' not supported between instances of 'str' and 'NoneType'" @@ -385,7 +384,7 @@ def test_min_max_numpy(method, box, dtype, request): assert result == expected -def test_fillna_args(dtype, request): +def test_fillna_args(dtype, request, arrow_string_storage): # GH 37987 arr = pd.array(["a", pd.NA], dtype=dtype) @@ -504,7 +503,7 @@ def test_use_inf_as_na(values, expected, dtype): tm.assert_frame_equal(result, expected) -def test_memory_usage(dtype): +def test_memory_usage(dtype, arrow_string_storage): # GH 33963 if dtype.storage in arrow_string_storage: diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index c645e13ecbd6f..1ab628f186b47 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -13,7 +13,6 @@ StringDtype, ) from pandas.core.arrays.string_arrow import ArrowStringArray -from pandas.tests.arrays.string_ import arrow_string_storage skip_if_no_pyarrow = pytest.mark.skipif( pa_version_under7p0, @@ -50,7 +49,7 @@ def test_config_bad_storage_raises(): @skip_if_no_pyarrow @pytest.mark.parametrize("chunked", [True, False]) @pytest.mark.parametrize("array", ["numpy", "pyarrow"]) -def test_constructor_not_string_type_raises(array, chunked): +def test_constructor_not_string_type_raises(array, chunked, arrow_string_storage): import pyarrow as pa array = pa if array in arrow_string_storage else np diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 1a99827426383..4e142eb6e14b8 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -25,8 +25,6 @@ from pandas.core.arrays.string_ import StringDtype from pandas.tests.extension import base -arrow_string_storage = ("pyarrow",) - def split_array(arr): if arr.dtype.storage != "pyarrow": @@ -105,7 +103,7 @@ def test_is_not_string_type(self, dtype): class TestInterface(base.BaseInterfaceTests): - def test_view(self, data, request): + def test_view(self, data, request, arrow_string_storage): if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_view(data) @@ -118,7 +116,7 @@ def test_from_dtype(self, data): class TestReshaping(base.BaseReshapingTests): - def test_transpose(self, data, request): + def test_transpose(self, data, request, arrow_string_storage): if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_transpose(data) @@ -129,7 +127,7 @@ class TestGetitem(base.BaseGetitemTests): class TestSetitem(base.BaseSetitemTests): - def test_setitem_preserves_views(self, data, request): + def test_setitem_preserves_views(self, data, request, arrow_string_storage): if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_setitem_preserves_views(data)