From 9eeb4f428811670c2f5ec8cb057bfa0a93102919 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 5 Jul 2022 13:37:41 -0700 Subject: [PATCH 1/4] ENH/TST: Add TestBaseReshaping tests for ArrowExtensionArray --- pandas/tests/extension/test_arrow.py | 71 ++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 861cc44310751..1dae34de02914 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -563,6 +563,77 @@ def test_fillna_frame(self, data_missing, using_array_manager, request): super().test_fillna_frame(data_missing) +class TestBaseReshaping(base.BaseReshapingTests): + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat(self, data, in_frame, request): + pa_dtype = data.dtype.pyarrow_dtype + if ( + pa.types.is_date(pa_dtype) + or pa.types.is_duration(pa_dtype) + or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="GH 47514: _concat_datetime expects axis arg.", + ) + ) + super().test_concat(data, in_frame) + + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat_all_na_block(self, data_missing, in_frame, request): + pa_dtype = data_missing.dtype.pyarrow_dtype + if ( + pa.types.is_date(pa_dtype) + or pa.types.is_duration(pa_dtype) + or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="GH 47514: _concat_datetime expects axis arg.", + ) + ) + super().test_concat_all_na_block(data_missing, in_frame) + + def test_concat_with_reindex(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="GH 47514: _concat_datetime expects axis arg.", + ) + ) + elif pa.types.is_date(pa_dtype) or ( + pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None + ): + request.node.add_marker( + pytest.mark.xfail( + raises=AttributeError, + reason="GH 34986", + ) + ) + super().test_concat_with_reindex(data) + + def test_merge_on_extension_array(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_date(pa_dtype) or ( + pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None + ): + request.node.add_marker( + pytest.mark.xfail( + raises=AttributeError, + reason="GH 34986", + ) + ) + super().test_merge_on_extension_array(data) + + @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") + def test_transpose(self, data): + super().test_transpose(data) + + class TestBaseSetitem(base.BaseSetitemTests): def test_setitem_scalar_series(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) From 86c614941b01f981757f55b936f40561b7d53229 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 5 Jul 2022 15:37:37 -0700 Subject: [PATCH 2/4] Fix failures --- pandas/tests/extension/test_arrow.py | 102 ++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 1dae34de02914..b6eb97b362839 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -596,6 +596,26 @@ def test_concat_all_na_block(self, data_missing, in_frame, request): ) super().test_concat_all_na_block(data_missing, in_frame) + def test_concat_columns(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_concat_columns(data, na_value) + + def test_concat_extension_arrays_copy_false(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_concat_extension_arrays_copy_false(data, na_value) + def test_concat_with_reindex(self, data, request): pa_dtype = data.dtype.pyarrow_dtype if pa.types.is_duration(pa_dtype): @@ -616,6 +636,46 @@ def test_concat_with_reindex(self, data, request): ) super().test_concat_with_reindex(data) + def test_align(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_align(data, na_value) + + def test_align_frame(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_align_frame(data, na_value) + + def test_align_series_frame(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_align_series_frame(data, na_value) + + def test_merge(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_merge(data, na_value) + def test_merge_on_extension_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype if pa.types.is_date(pa_dtype) or ( @@ -629,10 +689,50 @@ def test_merge_on_extension_array(self, data, request): ) super().test_merge_on_extension_array(data) + def test_merge_on_extension_array_duplicates(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + tz = getattr(pa_dtype, "tz", None) + if pa.types.is_date(pa_dtype) or ( + pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None + ): + request.node.add_marker( + pytest.mark.xfail( + raises=AttributeError, + reason="GH 34986", + ) + ) + elif pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_merge_on_extension_array_duplicates(data) + + def test_ravel(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_ravel(data) + @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") def test_transpose(self, data): super().test_transpose(data) + def test_transpose_frame(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_transpose_frame(data) + class TestBaseSetitem(base.BaseSetitemTests): def test_setitem_scalar_series(self, data, box_in_series, request): @@ -640,7 +740,7 @@ def test_setitem_scalar_series(self, data, box_in_series, request): if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) super().test_setitem_scalar_series(data, box_in_series) From 90080042fe3c6b02464b3629681c2fbc5b770915 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 5 Jul 2022 17:42:12 -0700 Subject: [PATCH 3/4] Adjust xfails --- pandas/tests/extension/test_arrow.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index b6eb97b362839..fb852ace70524 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -616,7 +616,7 @@ def test_concat_extension_arrays_copy_false(self, data, na_value, request): ) super().test_concat_extension_arrays_copy_false(data, na_value) - def test_concat_with_reindex(self, data, request): + def test_concat_with_reindex(self, data, request, using_array_manager): pa_dtype = data.dtype.pyarrow_dtype if pa.types.is_duration(pa_dtype): request.node.add_marker( @@ -630,7 +630,7 @@ def test_concat_with_reindex(self, data, request): ): request.node.add_marker( pytest.mark.xfail( - raises=AttributeError, + raises=AttributeError if not using_array_manager else TypeError, reason="GH 34986", ) ) @@ -691,7 +691,6 @@ def test_merge_on_extension_array(self, data, request): def test_merge_on_extension_array_duplicates(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - tz = getattr(pa_dtype, "tz", None) if pa.types.is_date(pa_dtype) or ( pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None ): @@ -701,12 +700,6 @@ def test_merge_on_extension_array_duplicates(self, data, request): reason="GH 34986", ) ) - elif pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) super().test_merge_on_extension_array_duplicates(data) def test_ravel(self, data, request): From 02151562587c5b2926a36d1a7887f6f68fe597a2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 6 Jul 2022 14:20:45 -0700 Subject: [PATCH 4/4] Add BasePrintingTests --- pandas/tests/extension/test_arrow.py | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fb852ace70524..7e0792a6010a7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -563,6 +563,38 @@ def test_fillna_frame(self, data_missing, using_array_manager, request): super().test_fillna_frame(data_missing) +class TestBasePrinting(base.BasePrintingTests): + def test_series_repr(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if ( + pa.types.is_date(pa_dtype) + or pa.types.is_duration(pa_dtype) + or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="GH 47514: _concat_datetime expects axis arg.", + ) + ) + super().test_series_repr(data) + + def test_dataframe_repr(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if ( + pa.types.is_date(pa_dtype) + or pa.types.is_duration(pa_dtype) + or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="GH 47514: _concat_datetime expects axis arg.", + ) + ) + super().test_dataframe_repr(data) + + class TestBaseReshaping(base.BaseReshapingTests): @pytest.mark.parametrize("in_frame", [True, False]) def test_concat(self, data, in_frame, request):