From 1dda2c26d147f12ed45844dde47b88b78e04a104 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:53:31 -0700 Subject: [PATCH 1/6] TST: Fix some test builds for numpy 2.0 --- pandas/compat/numpy/__init__.py | 2 +- pandas/core/dtypes/cast.py | 13 +++++-------- pandas/tests/indexes/datetimelike_/test_indexing.py | 2 +- pandas/tests/scalar/timedelta/test_arithmetic.py | 2 +- pandas/tests/tools/test_to_datetime.py | 2 +- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 54a12c76a230b..2fab8f32b8e71 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -12,7 +12,7 @@ np_version_gte1p24 = _nlv >= Version("1.24") np_version_gte1p24p3 = _nlv >= Version("1.24.3") np_version_gte1p25 = _nlv >= Version("1.25") -np_version_gt2 = _nlv >= Version("2.0.0.dev0") +np_version_gt2 = _nlv >= Version("2.0.0") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.23.5" diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 662b8c5791e51..f2af69fcc9d84 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -39,7 +39,6 @@ is_supported_dtype, ) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 -from pandas.compat.numpy import np_version_gt2 from pandas.errors import ( IntCastingNaNError, LossySetitemError, @@ -1643,13 +1642,11 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n with warnings.catch_warnings(): # We already disallow dtype=uint w/ negative numbers # (test_constructor_coercion_signed_to_unsigned) so safe to ignore. - if not np_version_gt2: - warnings.filterwarnings( - "ignore", - "NumPy will stop allowing conversion of " - "out-of-bound Python int", - DeprecationWarning, - ) + warnings.filterwarnings( + "ignore", + "NumPy will stop allowing conversion of " "out-of-bound Python int", + DeprecationWarning, + ) casted = np.asarray(arr, dtype=dtype) else: with warnings.catch_warnings(): diff --git a/pandas/tests/indexes/datetimelike_/test_indexing.py b/pandas/tests/indexes/datetimelike_/test_indexing.py index ee7128601256a..7b2c81aaf17de 100644 --- a/pandas/tests/indexes/datetimelike_/test_indexing.py +++ b/pandas/tests/indexes/datetimelike_/test_indexing.py @@ -19,7 +19,7 @@ @pytest.mark.parametrize("ldtype", dtlike_dtypes) @pytest.mark.parametrize("rdtype", dtlike_dtypes) def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype): - vals = np.tile(3600 * 10**9 * np.arange(3), 2) + vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2) def construct(dtype): if dtype is dtlike_dtypes[-1]: diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index efeca375affbb..150fc8d6fbaa5 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -419,7 +419,7 @@ def test_td_mul_numeric_ndarray(self): def test_td_mul_numeric_ndarray_0d(self): td = Timedelta("1 day") - other = np.array(2) + other = np.array(2, dtype=np.int64) assert other.ndim == 0 expected = Timedelta("2 days") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index cbbd018720bad..b779ef6b17012 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3182,7 +3182,7 @@ def test_invalid_origin(self, unit): ) def test_epoch(self, units, epochs): epoch_1960 = Timestamp(1960, 1, 1) - units_from_epochs = list(range(5)) + units_from_epochs = np.arange(5, dtype=np.int32) expected = Series( [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs] ) From 5431ab50ce4c291773453727112e76e90762f864 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jun 2024 14:47:37 -0700 Subject: [PATCH 2/6] 64 not 32 --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b779ef6b17012..85813e6041261 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3182,7 +3182,7 @@ def test_invalid_origin(self, unit): ) def test_epoch(self, units, epochs): epoch_1960 = Timestamp(1960, 1, 1) - units_from_epochs = np.arange(5, dtype=np.int32) + units_from_epochs = np.arange(5, dtype=np.int64) expected = Series( [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs] ) From ca28f250be92757fe3053019ff183139bb53fc56 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:37:44 -0700 Subject: [PATCH 3/6] Adjust some tests --- pandas/tests/dtypes/test_inference.py | 3 +-- pandas/tests/indexing/test_coercion.py | 8 ------- pandas/tests/indexing/test_loc.py | 12 +--------- pandas/tests/series/test_constructors.py | 28 +++++------------------- 4 files changed, 8 insertions(+), 43 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index db18cd4aef14e..097ed1ab96d3d 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -34,7 +34,6 @@ missing as libmissing, ops as libops, ) -from pandas.compat.numpy import np_version_gt2 from pandas.core.dtypes import inference from pandas.core.dtypes.cast import find_result_type @@ -1989,7 +1988,7 @@ def test_ensure_int32(): # find a smaller floating dtype (300.0, np.uint16), # for integer floats, we convert them to ints (300.1, np.float64), - (np.int16(300), np.int16 if np_version_gt2 else np.uint16), + (np.int16(300), np.uint16), ], ) def test_find_result_type_uint_int(right, result): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 84cd0d3b08b7b..25ba039f7dafe 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -15,7 +15,6 @@ IS64, is_platform_windows, ) -from pandas.compat.numpy import np_version_gt2 import pandas as pd import pandas._testing as tm @@ -219,8 +218,6 @@ def test_insert_int_index( "insert, coerced_val, coerced_dtype", [ (1, 1.0, None), - # When float_numpy_dtype=float32, this is not the case - # see the correction below (1.1, 1.1, np.float64), (False, False, object), # GH#36319 ("x", "x", object), @@ -232,11 +229,6 @@ def test_insert_float_index( dtype = float_numpy_dtype obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype) coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype - - if np_version_gt2 and dtype == "float32" and coerced_val == 1.1: - # Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32 - # the expected dtype will be float32 if the original dtype was float32 - coerced_dtype = np.float32 exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 16f3e0fd0c229..f771efe4a0342 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -16,7 +16,6 @@ from pandas._config import using_pyarrow_string_dtype from pandas._libs import index as libindex -from pandas.compat.numpy import np_version_gt2 from pandas.errors import IndexingError import pandas as pd @@ -2976,16 +2975,7 @@ def test_loc_setitem_uint8_upcast(value): df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8") with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"): df.loc[2, "col1"] = value # value that can't be held in uint8 - - if np_version_gt2 and isinstance(value, np.int16): - # Note, result type of uint8 + int16 is int16 - # in numpy < 2, though, numpy would inspect the - # value and see that it could fit in an uint16, resulting in a uint16 - dtype = "int16" - else: - dtype = "uint16" - - expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype) + expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16") tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 44a7862c21273..d3272a77cd7c6 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -769,12 +769,8 @@ def test_constructor_cast(self): def test_constructor_signed_int_overflow_raises(self): # GH#41734 disallow silent overflow, enforced in 2.0 - if np_version_gt2: - msg = "The elements provided in the data cannot all be casted to the dtype" - err = OverflowError - else: - msg = "Values are too large to be losslessly converted" - err = ValueError + msg = "Values are too large to be losslessly converted" + err = ValueError with pytest.raises(err, match=msg): Series([1, 200, 923442], dtype="int8") @@ -802,13 +798,7 @@ def test_constructor_numpy_uints(self, values): def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype): # see gh-15832 - if np_version_gt2: - msg = ( - f"The elements provided in the data cannot " - f"all be casted to the dtype {any_unsigned_int_numpy_dtype}" - ) - else: - msg = "Trying to coerce negative values to unsigned integers" + msg = "Trying to coerce negative values to unsigned integers" with pytest.raises(OverflowError, match=msg): Series([-1], dtype=any_unsigned_int_numpy_dtype) @@ -1938,15 +1928,9 @@ def test_constructor_int64_dtype(self, any_int_dtype): def test_constructor_raise_on_lossy_conversion_of_strings(self): # GH#44923 - if not np_version_gt2: - raises = pytest.raises( - ValueError, match="string values cannot be losslessly cast to int8" - ) - else: - raises = pytest.raises( - OverflowError, match="The elements provided in the data" - ) - with raises: + with pytest.raises( + ValueError, match="string values cannot be losslessly cast to int8" + ): Series(["128"], dtype="int8") def test_constructor_dtype_timedelta_alternative_construct(self): From 67f83ff20dd54269c892a25e0165bde291c295b8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 11:11:16 -0700 Subject: [PATCH 4/6] Revert "Adjust some tests" This reverts commit ca28f250be92757fe3053019ff183139bb53fc56. --- pandas/tests/dtypes/test_inference.py | 3 ++- pandas/tests/indexing/test_coercion.py | 8 +++++++ pandas/tests/indexing/test_loc.py | 12 +++++++++- pandas/tests/series/test_constructors.py | 28 +++++++++++++++++++----- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index fbdb8eacd2f9b..b1d7c701e1267 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -34,6 +34,7 @@ missing as libmissing, ops as libops, ) +from pandas.compat.numpy import np_version_gt2 from pandas.core.dtypes import inference from pandas.core.dtypes.cast import find_result_type @@ -1988,7 +1989,7 @@ def test_ensure_int32(): # find a smaller floating dtype (300.0, np.uint16), # for integer floats, we convert them to ints (300.1, np.float64), - (np.int16(300), np.uint16), + (np.int16(300), np.int16 if np_version_gt2 else np.uint16), ], ) def test_find_result_type_uint_int(right, result): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 25ba039f7dafe..84cd0d3b08b7b 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -15,6 +15,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas as pd import pandas._testing as tm @@ -218,6 +219,8 @@ def test_insert_int_index( "insert, coerced_val, coerced_dtype", [ (1, 1.0, None), + # When float_numpy_dtype=float32, this is not the case + # see the correction below (1.1, 1.1, np.float64), (False, False, object), # GH#36319 ("x", "x", object), @@ -229,6 +232,11 @@ def test_insert_float_index( dtype = float_numpy_dtype obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype) coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype + + if np_version_gt2 and dtype == "float32" and coerced_val == 1.1: + # Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32 + # the expected dtype will be float32 if the original dtype was float32 + coerced_dtype = np.float32 exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f771efe4a0342..16f3e0fd0c229 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -16,6 +16,7 @@ from pandas._config import using_pyarrow_string_dtype from pandas._libs import index as libindex +from pandas.compat.numpy import np_version_gt2 from pandas.errors import IndexingError import pandas as pd @@ -2975,7 +2976,16 @@ def test_loc_setitem_uint8_upcast(value): df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8") with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"): df.loc[2, "col1"] = value # value that can't be held in uint8 - expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16") + + if np_version_gt2 and isinstance(value, np.int16): + # Note, result type of uint8 + int16 is int16 + # in numpy < 2, though, numpy would inspect the + # value and see that it could fit in an uint16, resulting in a uint16 + dtype = "int16" + else: + dtype = "uint16" + + expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d3272a77cd7c6..44a7862c21273 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -769,8 +769,12 @@ def test_constructor_cast(self): def test_constructor_signed_int_overflow_raises(self): # GH#41734 disallow silent overflow, enforced in 2.0 - msg = "Values are too large to be losslessly converted" - err = ValueError + if np_version_gt2: + msg = "The elements provided in the data cannot all be casted to the dtype" + err = OverflowError + else: + msg = "Values are too large to be losslessly converted" + err = ValueError with pytest.raises(err, match=msg): Series([1, 200, 923442], dtype="int8") @@ -798,7 +802,13 @@ def test_constructor_numpy_uints(self, values): def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype): # see gh-15832 - msg = "Trying to coerce negative values to unsigned integers" + if np_version_gt2: + msg = ( + f"The elements provided in the data cannot " + f"all be casted to the dtype {any_unsigned_int_numpy_dtype}" + ) + else: + msg = "Trying to coerce negative values to unsigned integers" with pytest.raises(OverflowError, match=msg): Series([-1], dtype=any_unsigned_int_numpy_dtype) @@ -1928,9 +1938,15 @@ def test_constructor_int64_dtype(self, any_int_dtype): def test_constructor_raise_on_lossy_conversion_of_strings(self): # GH#44923 - with pytest.raises( - ValueError, match="string values cannot be losslessly cast to int8" - ): + if not np_version_gt2: + raises = pytest.raises( + ValueError, match="string values cannot be losslessly cast to int8" + ) + else: + raises = pytest.raises( + OverflowError, match="The elements provided in the data" + ) + with raises: Series(["128"], dtype="int8") def test_constructor_dtype_timedelta_alternative_construct(self): From 90b6e906fa25c890964753d4534acec3df596ee7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:11:18 -0700 Subject: [PATCH 5/6] Just pin numpy in pyarrow nightly build --- ci/deps/actions-311-pyarrownightly.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml index d84063ac2a9ba..5455b9b84b034 100644 --- a/ci/deps/actions-311-pyarrownightly.yaml +++ b/ci/deps/actions-311-pyarrownightly.yaml @@ -18,7 +18,7 @@ dependencies: # required dependencies - python-dateutil - - numpy + - numpy<2 - pytz - pip From cd0b63bd6743df3d9e2e1f43118950d4053b9a68 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:08:56 -0700 Subject: [PATCH 6/6] Mark test as pa under 17 --- pandas/compat/__init__.py | 2 ++ pandas/compat/pyarrow.py | 2 ++ pandas/tests/io/test_parquet.py | 5 ++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 13e6707667d0a..e08da7c7e14e3 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -31,6 +31,7 @@ pa_version_under14p0, pa_version_under14p1, pa_version_under16p0, + pa_version_under17p0, ) if TYPE_CHECKING: @@ -154,6 +155,7 @@ def is_ci_environment() -> bool: "pa_version_under14p0", "pa_version_under14p1", "pa_version_under16p0", + "pa_version_under17p0", "IS64", "ISMUSL", "PY311", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 5a96e5a4cc49a..87d3dc86cee87 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -16,6 +16,7 @@ pa_version_under14p1 = _palv < Version("14.0.1") pa_version_under15p0 = _palv < Version("15.0.0") pa_version_under16p0 = _palv < Version("16.0.0") + pa_version_under17p0 = _palv < Version("17.0.0") except ImportError: pa_version_under10p1 = True pa_version_under11p0 = True @@ -25,3 +26,4 @@ pa_version_under14p1 = True pa_version_under15p0 = True pa_version_under16p0 = True + pa_version_under17p0 = True diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 2e8e358b8e3c9..930df8abea30f 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -14,6 +14,7 @@ pa_version_under11p0, pa_version_under13p0, pa_version_under15p0, + pa_version_under17p0, ) import pandas as pd @@ -1033,7 +1034,9 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa): expected=expected, ) - @pytest.mark.xfail(reason="pa.pandas_compat passes 'datetime64' to .astype") + @pytest.mark.xfail( + pa_version_under17p0, reason="pa.pandas_compat passes 'datetime64' to .astype" + ) def test_columns_dtypes_not_invalid(self, pa): df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})