From fe490509e71131ccd06949b11d3b206581045fd1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 25 Oct 2023 10:39:14 -0700 Subject: [PATCH 01/19] CI: Debug timeouts --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 48ef21686a26f..a2f25776129ef 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" From 2241aeb9c7da333c20ad0354b6c9ffe7f6f26153 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 25 Oct 2023 18:22:04 -0400 Subject: [PATCH 02/19] Update run_tests.sh --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index a2f25776129ef..a4d56374d1384 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" From d9c1e20b78658a3a01ac13aad4b0ab762d0a77e0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 25 Oct 2023 19:29:54 -0700 Subject: [PATCH 03/19] See if delay also appears with no xdist --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index a4d56374d1384..2c95a6887480f 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n 0 --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" From f780345d7687733f2f64679fa3eb73603f038f02 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 08:52:05 -0700 Subject: [PATCH 04/19] REF: Avoid np.can_cast for scalar inference for NEP 50 --- pandas/core/dtypes/cast.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 27625db766862..d6882fee7aec3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -699,7 +699,9 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): - if not np.can_cast(fill_value, dtype): + try: + np_can_hold_element(dtype, fill_value) + except (LossySetitemError, NotImplementedError): # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) @@ -1751,9 +1753,14 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if dtype.kind in "iu": if isinstance(element, range): - if _dtype_can_hold_range(element, dtype): + if not len(element): + return True + try: + np_can_hold_element(dtype, element.start) + np_can_hold_element(dtype, element.stop) return element - raise LossySetitemError + except (LossySetitemError, NotImplementedError) as err: + raise LossySetitemError from err if is_integer(element) or (is_float(element) and element.is_integer()): # e.g. test_setitem_series_int8 if we have a python int 1 @@ -1906,14 +1913,3 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError raise NotImplementedError(dtype) - - -def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: - """ - _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), - but in many cases a range can be held by a smaller integer dtype. - Check if this is one of those cases. - """ - if not len(rng): - return True - return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) From 64b03f939c1bed6aebd44c09fb688fbf6299201a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 09:19:04 -0700 Subject: [PATCH 05/19] Use helper function --- pandas/core/dtypes/cast.py | 45 +++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d6882fee7aec3..2b146010b2e12 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -699,9 +699,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): - try: - np_can_hold_element(dtype, fill_value) - except (LossySetitemError, NotImplementedError): + if not np_can_cast_scalar(fill_value, dtype): # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) @@ -1753,14 +1751,9 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if dtype.kind in "iu": if isinstance(element, range): - if not len(element): - return True - try: - np_can_hold_element(dtype, element.start) - np_can_hold_element(dtype, element.stop) + if _dtype_can_hold_range(element, dtype): return element - except (LossySetitemError, NotImplementedError) as err: - raise LossySetitemError from err + raise LossySetitemError if is_integer(element) or (is_float(element) and element.is_integer()): # e.g. test_setitem_series_int8 if we have a python int 1 @@ -1913,3 +1906,35 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError raise NotImplementedError(dtype) + + +def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: + """ + _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), + but in many cases a range can be held by a smaller integer dtype. + Check if this is one of those cases. + """ + if not len(rng): + return True + return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.end, dtype) + + +def np_can_cast_scalar(element: Scalar, dtype: np.dtype) -> bool: + """ + np.can_cast pandas-equivalent for pre 2-0 behavior that allowed scalar + inference + + Parameters + ---------- + element : Scalar + dtype : np.dtype + + Returns + ------- + bool + """ + try: + np_can_hold_element(dtype, element) + return True + except (LossySetitemError, NotImplementedError): + return False From 2b5c4b0a23ae35c026b9844554029f993ee7a5f4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 09:56:59 -0700 Subject: [PATCH 06/19] end->stop --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 2b146010b2e12..8191476944d22 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1916,7 +1916,7 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: """ if not len(rng): return True - return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.end, dtype) + return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.stop, dtype) def np_can_cast_scalar(element: Scalar, dtype: np.dtype) -> bool: From c55e46ec5f41f242d94dd9936959ebd84e69f7a3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:18:53 -0700 Subject: [PATCH 07/19] ignore typing --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8191476944d22..716d1a78f93c5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -699,7 +699,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): - if not np_can_cast_scalar(fill_value, dtype): + if not np_can_cast_scalar(fill_value, dtype): # type: ignore[arg-type] # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) From 6176b11e830fb72e2b32ea00b55645621dd3c0cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:11:43 -0700 Subject: [PATCH 08/19] Address NEP 50 later --- ci/run_tests.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 48ef21686a26f..6a70ea1df3e71 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,8 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE +PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" From 28494ca7af852708fac4ec71b8030be89421f37b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 27 Oct 2023 18:17:46 -0700 Subject: [PATCH 09/19] Set pyarrow set_io_thread_count --- pandas/tests/io/parser/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 16ee8ab4106ef..c8649d4405b22 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -123,6 +123,7 @@ def all_parsers(request): import pyarrow pyarrow.set_cpu_count(1) + pyarrow.set_io_thread_count(1) return parser From ca4d50487ef4f2a5f99b5e61d84d6cbfa2bc6005 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 28 Oct 2023 09:47:57 -0700 Subject: [PATCH 10/19] SHow setup --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index e7f41fd5c6bba..c00aad6fe6656 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -11,7 +11,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" # TODO: Support NEP 50 and remove NPY_PROMOTION_STATE -PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n 0 --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n 0 --setup-show --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" From 739511b867520b2632ba87cf66a45813f7052905 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 28 Oct 2023 13:07:35 -0700 Subject: [PATCH 11/19] Trigger ci From db81d0404d3304d5aba19be126b237adaf5c4f97 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 29 Oct 2023 10:28:41 -0700 Subject: [PATCH 12/19] Set faulthanndler timeout --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 85bb937fe431f..15a16c25391c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -473,7 +473,7 @@ disable = [ [tool.pytest.ini_options] # sync minversion with pyproject.toml & install.rst minversion = "7.3.2" -addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" +addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml faulthandler_timeout=300" empty_parameter_set_mark = "fail_at_collect" xfail_strict = true testpaths = "pandas" From 4dc96ad49221522fc3b9eca812cdcdd8584b6d50 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 30 Oct 2023 10:22:15 -0700 Subject: [PATCH 13/19] Use -o --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 15a16c25391c7..b8c0b7fe53324 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -473,7 +473,7 @@ disable = [ [tool.pytest.ini_options] # sync minversion with pyproject.toml & install.rst minversion = "7.3.2" -addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml faulthandler_timeout=300" +addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml -o faulthandler_timeout=300" empty_parameter_set_mark = "fail_at_collect" xfail_strict = true testpaths = "pandas" From dc4f3cb797b21224570ff704a2452bfbfcc7c972 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:38:16 -0700 Subject: [PATCH 14/19] Don't mess with cpu/io settings --- pandas/tests/io/parser/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index c8649d4405b22..6a5b421a78785 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -120,10 +120,10 @@ def all_parsers(request): pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) # Try finding a way to disable threads all together # for more stable CI runs - import pyarrow + # import pyarrow - pyarrow.set_cpu_count(1) - pyarrow.set_io_thread_count(1) + # pyarrow.set_cpu_count(1) + # pyarrow.set_io_thread_count(1) return parser From 1bfe92dadc290ee65e261498e80880b295309ed7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 7 Nov 2023 08:10:56 -0800 Subject: [PATCH 15/19] Remove faulthandler --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8c44ddbf699d4..26d52d97b0934 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -473,7 +473,7 @@ disable = [ [tool.pytest.ini_options] # sync minversion with pyproject.toml & install.rst minversion = "7.3.2" -addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml -o faulthandler_timeout=300" +addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" empty_parameter_set_mark = "fail_at_collect" xfail_strict = true testpaths = "pandas" From dbe2383e889f8b8a93581523ecd63260bd3a3f05 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 7 Nov 2023 09:28:00 -0800 Subject: [PATCH 16/19] use default workers --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index c00aad6fe6656..90dec1a2f3e90 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -11,7 +11,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" # TODO: Support NEP 50 and remove NPY_PROMOTION_STATE -PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v -r fEs -n 0 --setup-show --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -v --setup-show -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" From aeab4079d461be2a477cfbea525bbef47c9dc90c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 7 Nov 2023 15:49:43 -0800 Subject: [PATCH 17/19] Skip test_invalid_parse_delimited_date --- pandas/tests/io/parser/test_parse_dates.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2fd389772ca4f..e8a37a2a7c15b 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -19,6 +19,7 @@ from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import py_parse_datetime_string +from pandas.compat import is_ci_environment import pandas as pd from pandas import ( @@ -1796,13 +1797,21 @@ def test_parse_timezone(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # pandas.errors.ParserError: CSV parse error @pytest.mark.parametrize( "date_string", ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"], ) -def test_invalid_parse_delimited_date(all_parsers, date_string): +def test_invalid_parse_delimited_date(all_parsers, date_string, request): parser = all_parsers + if parser.engine == "pyarrow": + if is_ci_environment(): + pytest.skip(f"Can hang in CI environment with {parser.engine=}") + else: + mark = pytest.mark.xfail( + reason="CSV parse error: Empty CSV file or block: " + "cannot infer number of columns" + ) + request.applymarker(mark) expected = DataFrame({0: [date_string]}, dtype="object") result = parser.read_csv( StringIO(date_string), From dff4d3747a8a5ff0bf9747b7f114b537d238b54f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 7 Nov 2023 15:50:01 -0800 Subject: [PATCH 18/19] Skip test_invalid_parse_delimited_date --- pandas/tests/io/parser/test_parse_dates.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index e8a37a2a7c15b..2865e00f07135 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1808,8 +1808,10 @@ def test_invalid_parse_delimited_date(all_parsers, date_string, request): pytest.skip(f"Can hang in CI environment with {parser.engine=}") else: mark = pytest.mark.xfail( - reason="CSV parse error: Empty CSV file or block: " - "cannot infer number of columns" + reason=( + "CSV parse error: Empty CSV file or block: " + "cannot infer number of columns" + ) ) request.applymarker(mark) expected = DataFrame({0: [date_string]}, dtype="object") From 656ef9db3b2dc8f5b55913168a94d1e1edb47371 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 8 Nov 2023 15:33:29 -0800 Subject: [PATCH 19/19] test_index_col_empty_data --- pandas/tests/io/parser/test_index_col.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index b938b129ac38d..95c0096ae864b 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas.compat import is_ci_environment + from pandas import ( DataFrame, Index, @@ -94,7 +96,6 @@ def test_infer_index_col(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block @pytest.mark.parametrize( "index_col,kwargs", [ @@ -134,9 +135,20 @@ def test_infer_index_col(all_parsers): ), ], ) -def test_index_col_empty_data(all_parsers, index_col, kwargs): +def test_index_col_empty_data(all_parsers, index_col, kwargs, request): data = "x,y,z" parser = all_parsers + if parser.engine == "pyarrow": + if is_ci_environment(): + pytest.skip(f"Can hang in CI environment with {parser.engine=}") + else: + mark = pytest.mark.xfail( + reason=( + "CSV parse error: Empty CSV file or block: " + "cannot infer number of columns" + ) + ) + request.applymarker(mark) result = parser.read_csv(StringIO(data), index_col=index_col) expected = DataFrame(**kwargs)