From 71a2576d2b14051d912b8b4a069069e5e8c93129 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 15:56:42 -0800 Subject: [PATCH 1/6] CI/TST: Skip another s3 test that can crash GHA worker --- pandas/tests/io/parser/test_network.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index d4c3c93a32af0..b135ab310d8c9 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -7,6 +7,7 @@ StringIO, ) import logging +import os import numpy as np import pytest @@ -264,6 +265,12 @@ def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): expected = read_csv(tips_file) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1", + reason="This test can hang in our CI min_versions build " + "and leads to '##[error]The runner has " + "received a shutdown signal...' in GHA", + ) def test_read_csv_chunked_download(self, s3_resource, caplog, s3so): # 8 MB, S3FS uses 5MB chunks import s3fs From 14354afa37da82ab157e9e8727e164a297eea134 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 19:10:02 -0800 Subject: [PATCH 2/6] skip test_user_agent --- pandas/tests/io/parser/test_network.py | 2 +- pandas/tests/io/test_user_agent.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index b135ab310d8c9..6b08ea4da8f56 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -269,7 +269,7 @@ def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): os.environ.get("PANDAS_CI", "0") == "1", reason="This test can hang in our CI min_versions build " "and leads to '##[error]The runner has " - "received a shutdown signal...' in GHA", + "received a shutdown signal...' in GHA. GH: 45651", ) def test_read_csv_chunked_download(self, s3_resource, caplog, s3so): # 8 MB, S3FS uses 5MB chunks diff --git a/pandas/tests/io/test_user_agent.py b/pandas/tests/io/test_user_agent.py index 78f2365a09d4c..252402f34bdda 100644 --- a/pandas/tests/io/test_user_agent.py +++ b/pandas/tests/io/test_user_agent.py @@ -5,6 +5,7 @@ import http.server from io import BytesIO import multiprocessing +import os import socket import time import urllib.error @@ -17,6 +18,13 @@ import pandas as pd import pandas._testing as tm +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1", + reason="This test can hang in our CI min_versions build " + "and leads to '##[error]The runner has " + "received a shutdown signal...' in GHA. GH 45651", +) + class BaseUserAgentResponder(http.server.BaseHTTPRequestHandler): """ From dfd1f3e657f6c5966479085119451be466da4f40 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 19:17:32 -0800 Subject: [PATCH 3/6] xfail strict=False another flakey test --- pandas/tests/base/test_unique.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index ac21c2f979dd4..53b04e7ed9fd5 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -105,6 +105,9 @@ def test_nunique_null(null_obj, index_or_series_obj): @pytest.mark.single +@pytest.mark.xfail( + "Flaky in the CI. Remove once CI has a single build: GH 44584", strict=False +) def test_unique_bad_unicode(index_or_series): # regression test for #34550 uval = "\ud83d" # smiley emoji From b454a37c09088002b683ae45dff8e21a5d50c94b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 20:03:05 -0800 Subject: [PATCH 4/6] Fix xfail condition --- .github/workflows/posix.yml | 2 +- pandas/tests/base/test_unique.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 4beba743209b6..8080a81519d8f 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -50,7 +50,7 @@ jobs: COVERAGE: ${{ !contains(matrix.settings[0], 'pypy') }} concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.settings[0] }}-${{ matrix.settings[1] }} + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.settings[0] }}-${{ matrix.settings[1] }}-${{ matrix.settings[2] }} cancel-in-progress: true services: diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 53b04e7ed9fd5..e399ae45160fc 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -106,7 +106,7 @@ def test_nunique_null(null_obj, index_or_series_obj): @pytest.mark.single @pytest.mark.xfail( - "Flaky in the CI. Remove once CI has a single build: GH 44584", strict=False + reason="Flaky in the CI. Remove once CI has a single build: GH 44584", strict=False ) def test_unique_bad_unicode(index_or_series): # regression test for #34550 From ee0c2ad4c6d98eb5d441d0bd6ef0aa54d72920ce Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 21:15:37 -0800 Subject: [PATCH 5/6] Fastparquet was fixed too --- pandas/tests/io/test_fsspec.py | 2 -- pandas/tests/io/test_parquet.py | 37 +----------------------------- pandas/tests/io/test_user_agent.py | 3 --- 3 files changed, 1 insertion(+), 41 deletions(-) diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 172065755d4b7..f1040c0bd30f2 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -3,7 +3,6 @@ import numpy as np import pytest -from pandas.compat import PY310 from pandas.compat._optional import VERSIONS from pandas import ( @@ -182,7 +181,6 @@ def test_arrowparquet_options(fsspectest): @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) fastparquet @td.skip_if_no("fastparquet") -@pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_fastparquet_options(fsspectest): """Regression test for writing to a not-yet-existent GCS Parquet file.""" df = DataFrame({"a": [0]}) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a1a39a1cf8881..b4e91ae06e5b6 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -13,10 +13,7 @@ from pandas._config import get_option -from pandas.compat import ( - PY310, - is_platform_windows, -) +from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( pa_version_under2p0, pa_version_under5p0, @@ -265,7 +262,6 @@ def test_options_py(df_compat, pa): check_round_trip(df_compat) -@pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_options_fp(df_compat, fp): # use the set option @@ -343,7 +339,6 @@ def test_get_engine_auto_error_message(): get_engine("auto") -@pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_cross_engine_pa_fp(df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines @@ -410,10 +405,6 @@ def test_error(self, engine): self.check_error_on_write(obj, engine, ValueError, msg) def test_columns_dtypes(self, request, engine): - if PY310 and engine == "fastparquet": - request.node.add_marker( - pytest.mark.xfail(reason="fastparquet failing on 3.10") - ) df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) # unicode @@ -439,7 +430,6 @@ def test_columns_dtypes_invalid(self, engine): ] self.check_error_on_write(df, engine, ValueError, msg) - @pytest.mark.parametrize("compression", [None, "gzip", "snappy", "brotli"]) def test_compression(self, engine, compression, request): if compression == "snappy": @@ -448,19 +438,11 @@ def test_compression(self, engine, compression, request): elif compression == "brotli": pytest.importorskip("brotli") - if PY310 and engine == "fastparquet": - request.node.add_marker( - pytest.mark.xfail(reason="fastparquet failing on 3.10") - ) df = pd.DataFrame({"A": [1, 2, 3]}) check_round_trip(df, engine, write_kwargs={"compression": compression}) def test_read_columns(self, engine, request): # GH18154 - if PY310 and engine == "fastparquet": - request.node.add_marker( - pytest.mark.xfail(reason="fastparquet failing on 3.10") - ) df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) expected = pd.DataFrame({"string": list("abc")}) @@ -469,10 +451,6 @@ def test_read_columns(self, engine, request): ) def test_write_index(self, engine, request): - if PY310 and engine == "fastparquet": - request.node.add_marker( - pytest.mark.xfail(reason="fastparquet failing on 3.10") - ) check_names = engine != "fastparquet" df = pd.DataFrame({"A": [1, 2, 3]}) @@ -524,10 +502,6 @@ def test_multiindex_with_columns(self, pa): def test_write_ignoring_index(self, engine, request): # ENH 20768 # Ensure index=False omits the index from the written Parquet file. - if PY310 and engine == "fastparquet": - request.node.add_marker( - pytest.mark.xfail(reason="fastparquet failing on 3.10") - ) df = pd.DataFrame({"a": [1, 2, 3], "b": ["q", "r", "s"]}) write_kwargs = {"compression": None, "index": False} @@ -1011,7 +985,6 @@ def test_read_parquet_manager(self, pa, using_array_manager): class TestParquetFastParquet(Base): - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_basic(self, fp, df_full): df = df_full @@ -1029,7 +1002,6 @@ def test_duplicate_columns(self, fp): msg = "Cannot create parquet dataset with duplicate column names" self.check_error_on_write(df, fp, ValueError, msg) - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_bool_with_none(self, fp): df = pd.DataFrame({"a": [True, None, False]}) expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16") @@ -1049,12 +1021,10 @@ def test_unsupported(self, fp): msg = "Can't infer object conversion type" self.check_error_on_write(df, fp, ValueError, msg) - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_categorical(self, fp): df = pd.DataFrame({"a": pd.Categorical(list("abc"))}) check_round_trip(df, fp) - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_filter_row_groups(self, fp): d = {"a": list(range(0, 3))} df = pd.DataFrame(d) @@ -1073,7 +1043,6 @@ def test_s3_roundtrip(self, df_compat, s3_resource, fp, s3so): write_kwargs={"compression": None, "storage_options": s3so}, ) - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_partition_cols_supported(self, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] @@ -1091,7 +1060,6 @@ def test_partition_cols_supported(self, fp, df_full): actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 2 - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_partition_cols_string(self, fp, df_full): # GH #27117 partition_cols = "bool" @@ -1109,7 +1077,6 @@ def test_partition_cols_string(self, fp, df_full): actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 1 - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_partition_on_supported(self, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] @@ -1145,7 +1112,6 @@ def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): partition_cols=partition_cols, ) - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_empty_dataframe(self, fp): # GH #27339 df = pd.DataFrame() @@ -1153,7 +1119,6 @@ def test_empty_dataframe(self, fp): expected.index.name = "index" check_round_trip(df, fp, expected=expected) - @pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10") def test_timezone_aware_index(self, fp, timezone_aware_date_list): idx = 5 * [timezone_aware_date_list] diff --git a/pandas/tests/io/test_user_agent.py b/pandas/tests/io/test_user_agent.py index 252402f34bdda..a5869e919f478 100644 --- a/pandas/tests/io/test_user_agent.py +++ b/pandas/tests/io/test_user_agent.py @@ -12,7 +12,6 @@ import pytest -from pandas.compat import PY310 import pandas.util._test_decorators as td import pandas as pd @@ -253,7 +252,6 @@ def responder(request): # TODO(ArrayManager) fastparquet marks=[ td.skip_array_manager_not_yet_implemented, - pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10"), ], ), (PickleUserAgentResponder, pd.read_pickle, None), @@ -291,7 +289,6 @@ def test_server_and_default_headers(responder, read_method, parquet_engine): # TODO(ArrayManager) fastparquet marks=[ td.skip_array_manager_not_yet_implemented, - pytest.mark.xfail(PY310, reason="fastparquet failing on 3.10"), ], ), (PickleUserAgentResponder, pd.read_pickle, None), From ead9c157473b8e174ac7529955bc532b9e15de1e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 21:43:14 -0800 Subject: [PATCH 6/6] Accidentally removed too much --- pandas/tests/io/test_parquet.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b4e91ae06e5b6..2eb8738d88b41 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -404,7 +404,7 @@ def test_error(self, engine): msg = "to_parquet only supports IO with DataFrames" self.check_error_on_write(obj, engine, ValueError, msg) - def test_columns_dtypes(self, request, engine): + def test_columns_dtypes(self, engine): df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) # unicode @@ -430,7 +430,8 @@ def test_columns_dtypes_invalid(self, engine): ] self.check_error_on_write(df, engine, ValueError, msg) - def test_compression(self, engine, compression, request): + @pytest.mark.parametrize("compression", [None, "gzip", "snappy", "brotli"]) + def test_compression(self, engine, compression): if compression == "snappy": pytest.importorskip("snappy") @@ -441,7 +442,7 @@ def test_compression(self, engine, compression, request): df = pd.DataFrame({"A": [1, 2, 3]}) check_round_trip(df, engine, write_kwargs={"compression": compression}) - def test_read_columns(self, engine, request): + def test_read_columns(self, engine): # GH18154 df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) @@ -450,7 +451,7 @@ def test_read_columns(self, engine, request): df, engine, expected=expected, read_kwargs={"columns": ["string"]} ) - def test_write_index(self, engine, request): + def test_write_index(self, engine): check_names = engine != "fastparquet" df = pd.DataFrame({"A": [1, 2, 3]}) @@ -499,7 +500,7 @@ def test_multiindex_with_columns(self, pa): df, engine, read_kwargs={"columns": ["A", "B"]}, expected=df[["A", "B"]] ) - def test_write_ignoring_index(self, engine, request): + def test_write_ignoring_index(self, engine): # ENH 20768 # Ensure index=False omits the index from the written Parquet file. df = pd.DataFrame({"a": [1, 2, 3], "b": ["q", "r", "s"]})