From ccc6a707e25bb9c7a8fd6ab32c68404e62c9a9e7 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 2 Apr 2021 17:00:36 +0100
Subject: [PATCH 1/2] TST: [ArrowStringArray] more parameterised testing - part
 3

---
 pandas/tests/io/formats/test_to_csv.py             | 11 ++++++++---
 pandas/tests/io/test_parquet.py                    |  4 ++++
 pandas/tests/series/methods/test_convert_dtypes.py |  4 ++--
 pandas/tests/series/methods/test_replace.py        |  4 ++--
 pandas/tests/series/test_constructors.py           |  8 +++++---
 5 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 5e599818308b8..bbf78a9013731 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -204,12 +204,17 @@ def test_to_csv_na_rep(self):
         assert df.set_index("a").to_csv(na_rep="_") == expected
         assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
 
-        # GH 29975
-        # Make sure full na_rep shows up when a dtype is provided
         csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
         expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
         assert expected == csv
-        csv = pd.Series(["a", pd.NA, "c"], dtype="string").to_csv(na_rep="ZZZZZ")
+
+    def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
+        # GH 29975
+        # Make sure full na_rep shows up when a dtype is provided
+        expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
+        csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
+            na_rep="ZZZZZ"
+        )
         assert expected == csv
 
     def test_to_csv_date_format(self):
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index e74c915bbaf74..21f922eba6dd3 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -812,11 +812,15 @@ def test_write_with_schema(self, pa):
     def test_additional_extension_arrays(self, pa):
         # test additional ExtensionArrays that are supported through the
         # __arrow_array__ protocol
+
+        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
+
         df = pd.DataFrame(
             {
                 "a": pd.Series([1, 2, 3], dtype="Int64"),
                 "b": pd.Series([1, 2, 3], dtype="UInt32"),
                 "c": pd.Series(["a", None, "c"], dtype="string"),
+                "d": pd.Series(["a", None, "c"], dtype="arrow_string"),
             }
         )
         if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"):
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
index b68c9c9b0e529..8283bcd16dbad 100644
--- a/pandas/tests/series/methods/test_convert_dtypes.py
+++ b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -212,11 +212,11 @@ def test_convert_dtypes(
         # Make sure original not changed
         tm.assert_series_equal(series, copy)
 
-    def test_convert_string_dtype(self):
+    def test_convert_string_dtype(self, nullable_string_dtype):
         # https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
         # that are already string dtype
         df = pd.DataFrame(
-            {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype="string"
+            {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype
         )
         result = df.convert_dtypes()
         tm.assert_frame_equal(df, result)
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 69dd7d083119f..b21a2c54ae615 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -254,9 +254,9 @@ def test_replace2(self):
         assert (ser[6:10] == -1).all()
         assert (ser[20:30] == -1).all()
 
-    def test_replace_with_dictlike_and_string_dtype(self):
+    def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
         # GH 32621
-        s = pd.Series(["one", "two", np.nan], dtype="string")
+        s = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
         expected = pd.Series(["1", "2", np.nan])
         result = s.replace({"one": "1", "two": "2"})
         tm.assert_series_equal(expected, result)
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 75474a29169a7..82961a42e4ff0 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -1549,10 +1549,12 @@ def test_constructor_datetime64(self):
         series = Series(dates)
         assert np.issubdtype(series.dtype, np.dtype("M8[ns]"))
 
-    def test_constructor_datetimelike_scalar_to_string_dtype(self):
+    def test_constructor_datetimelike_scalar_to_string_dtype(
+        self, nullable_string_dtype
+    ):
         # https://github.com/pandas-dev/pandas/pull/33846
-        result = Series("M", index=[1, 2, 3], dtype="string")
-        expected = Series(["M", "M", "M"], index=[1, 2, 3], dtype="string")
+        result = Series("M", index=[1, 2, 3], dtype=nullable_string_dtype)
+        expected = Series(["M", "M", "M"], index=[1, 2, 3], dtype=nullable_string_dtype)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(

From f68c7c8084efdbabbc51be4ea5c5a19e4e3e2a07 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 8 Apr 2021 19:51:15 +0100
Subject: [PATCH 2/2] separate, dedicated test for ArrowStringArray

---
 pandas/tests/io/test_parquet.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 21f922eba6dd3..21ea2bd560060 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -812,15 +812,11 @@ def test_write_with_schema(self, pa):
     def test_additional_extension_arrays(self, pa):
         # test additional ExtensionArrays that are supported through the
         # __arrow_array__ protocol
-
-        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
         df = pd.DataFrame(
             {
                 "a": pd.Series([1, 2, 3], dtype="Int64"),
                 "b": pd.Series([1, 2, 3], dtype="UInt32"),
                 "c": pd.Series(["a", None, "c"], dtype="string"),
-                "d": pd.Series(["a", None, "c"], dtype="arrow_string"),
             }
         )
         if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"):
@@ -840,6 +836,14 @@ def test_additional_extension_arrays(self, pa):
             expected = df.assign(a=df.a.astype("float64"))
         check_round_trip(df, pa, expected=expected)
 
+    @td.skip_if_no("pyarrow", min_version="1.0.0")
+    def test_pyarrow_backed_string_array(self, pa):
+        # test ArrowStringArray supported through the __arrow_array__ protocol
+        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
+
+        df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="arrow_string")})
+        check_round_trip(df, pa, expected=df)
+
     @td.skip_if_no("pyarrow", min_version="0.16.0")
     def test_additional_extension_types(self, pa):
         # test additional ExtensionArrays that are supported through the