From ccc6a707e25bb9c7a8fd6ab32c68404e62c9a9e7 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 2 Apr 2021 17:00:36 +0100 Subject: [PATCH 1/2] TST: [ArrowStringArray] more parameterised testing - part 3 --- pandas/tests/io/formats/test_to_csv.py | 11 ++++++++--- pandas/tests/io/test_parquet.py | 4 ++++ pandas/tests/series/methods/test_convert_dtypes.py | 4 ++-- pandas/tests/series/methods/test_replace.py | 4 ++-- pandas/tests/series/test_constructors.py | 8 +++++--- 5 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 5e599818308b8..bbf78a9013731 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -204,12 +204,17 @@ def test_to_csv_na_rep(self): assert df.set_index("a").to_csv(na_rep="_") == expected assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected - # GH 29975 - # Make sure full na_rep shows up when a dtype is provided csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ") expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"]) assert expected == csv - csv = pd.Series(["a", pd.NA, "c"], dtype="string").to_csv(na_rep="ZZZZZ") + + def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype): + # GH 29975 + # Make sure full na_rep shows up when a dtype is provided + expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"]) + csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv( + na_rep="ZZZZZ" + ) assert expected == csv def test_to_csv_date_format(self): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index e74c915bbaf74..21f922eba6dd3 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -812,11 +812,15 @@ def test_write_with_schema(self, pa): def test_additional_extension_arrays(self, pa): # test additional ExtensionArrays that are supported through the # __arrow_array__ protocol + + from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 + df = pd.DataFrame( { "a": pd.Series([1, 2, 3], dtype="Int64"), "b": pd.Series([1, 2, 3], dtype="UInt32"), "c": pd.Series(["a", None, "c"], dtype="string"), + "d": pd.Series(["a", None, "c"], dtype="arrow_string"), } ) if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"): diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index b68c9c9b0e529..8283bcd16dbad 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -212,11 +212,11 @@ def test_convert_dtypes( # Make sure original not changed tm.assert_series_equal(series, copy) - def test_convert_string_dtype(self): + def test_convert_string_dtype(self, nullable_string_dtype): # https://github.com/pandas-dev/pandas/issues/31731 -> converting columns # that are already string dtype df = pd.DataFrame( - {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype="string" + {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype ) result = df.convert_dtypes() tm.assert_frame_equal(df, result) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 69dd7d083119f..b21a2c54ae615 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -254,9 +254,9 @@ def test_replace2(self): assert (ser[6:10] == -1).all() assert (ser[20:30] == -1).all() - def test_replace_with_dictlike_and_string_dtype(self): + def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype): # GH 32621 - s = pd.Series(["one", "two", np.nan], dtype="string") + s = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) expected = pd.Series(["1", "2", np.nan]) result = s.replace({"one": "1", "two": "2"}) tm.assert_series_equal(expected, result) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 75474a29169a7..82961a42e4ff0 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1549,10 +1549,12 @@ def test_constructor_datetime64(self): series = Series(dates) assert np.issubdtype(series.dtype, np.dtype("M8[ns]")) - def test_constructor_datetimelike_scalar_to_string_dtype(self): + def test_constructor_datetimelike_scalar_to_string_dtype( + self, nullable_string_dtype + ): # https://github.com/pandas-dev/pandas/pull/33846 - result = Series("M", index=[1, 2, 3], dtype="string") - expected = Series(["M", "M", "M"], index=[1, 2, 3], dtype="string") + result = Series("M", index=[1, 2, 3], dtype=nullable_string_dtype) + expected = Series(["M", "M", "M"], index=[1, 2, 3], dtype=nullable_string_dtype) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( From f68c7c8084efdbabbc51be4ea5c5a19e4e3e2a07 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 8 Apr 2021 19:51:15 +0100 Subject: [PATCH 2/2] separate, dedicated test for ArrowStringArray --- pandas/tests/io/test_parquet.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 21f922eba6dd3..21ea2bd560060 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -812,15 +812,11 @@ def test_write_with_schema(self, pa): def test_additional_extension_arrays(self, pa): # test additional ExtensionArrays that are supported through the # __arrow_array__ protocol - - from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 - df = pd.DataFrame( { "a": pd.Series([1, 2, 3], dtype="Int64"), "b": pd.Series([1, 2, 3], dtype="UInt32"), "c": pd.Series(["a", None, "c"], dtype="string"), - "d": pd.Series(["a", None, "c"], dtype="arrow_string"), } ) if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"): @@ -840,6 +836,14 @@ def test_additional_extension_arrays(self, pa): expected = df.assign(a=df.a.astype("float64")) check_round_trip(df, pa, expected=expected) + @td.skip_if_no("pyarrow", min_version="1.0.0") + def test_pyarrow_backed_string_array(self, pa): + # test ArrowStringArray supported through the __arrow_array__ protocol + from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 + + df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="arrow_string")}) + check_round_trip(df, pa, expected=df) + @td.skip_if_no("pyarrow", min_version="0.16.0") def test_additional_extension_types(self, pa): # test additional ExtensionArrays that are supported through the