From 1dda2c26d147f12ed45844dde47b88b78e04a104 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 18 Jun 2024 11:53:31 -0700
Subject: [PATCH 1/6] TST: Fix some test builds for numpy 2.0

---
 pandas/compat/numpy/__init__.py                     |  2 +-
 pandas/core/dtypes/cast.py                          | 13 +++++--------
 pandas/tests/indexes/datetimelike_/test_indexing.py |  2 +-
 pandas/tests/scalar/timedelta/test_arithmetic.py    |  2 +-
 pandas/tests/tools/test_to_datetime.py              |  2 +-
 5 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
index 54a12c76a230b..2fab8f32b8e71 100644
--- a/pandas/compat/numpy/__init__.py
+++ b/pandas/compat/numpy/__init__.py
@@ -12,7 +12,7 @@
 np_version_gte1p24 = _nlv >= Version("1.24")
 np_version_gte1p24p3 = _nlv >= Version("1.24.3")
 np_version_gte1p25 = _nlv >= Version("1.25")
-np_version_gt2 = _nlv >= Version("2.0.0.dev0")
+np_version_gt2 = _nlv >= Version("2.0.0")
 is_numpy_dev = _nlv.dev is not None
 _min_numpy_ver = "1.23.5"
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 662b8c5791e51..f2af69fcc9d84 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -39,7 +39,6 @@
     is_supported_dtype,
 )
 from pandas._libs.tslibs.timedeltas import array_to_timedelta64
-from pandas.compat.numpy import np_version_gt2
 from pandas.errors import (
     IntCastingNaNError,
     LossySetitemError,
@@ -1643,13 +1642,11 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
             with warnings.catch_warnings():
                 # We already disallow dtype=uint w/ negative numbers
                 # (test_constructor_coercion_signed_to_unsigned) so safe to ignore.
-                if not np_version_gt2:
-                    warnings.filterwarnings(
-                        "ignore",
-                        "NumPy will stop allowing conversion of "
-                        "out-of-bound Python int",
-                        DeprecationWarning,
-                    )
+                warnings.filterwarnings(
+                    "ignore",
+                    "NumPy will stop allowing conversion of " "out-of-bound Python int",
+                    DeprecationWarning,
+                )
                 casted = np.asarray(arr, dtype=dtype)
         else:
             with warnings.catch_warnings():
diff --git a/pandas/tests/indexes/datetimelike_/test_indexing.py b/pandas/tests/indexes/datetimelike_/test_indexing.py
index ee7128601256a..7b2c81aaf17de 100644
--- a/pandas/tests/indexes/datetimelike_/test_indexing.py
+++ b/pandas/tests/indexes/datetimelike_/test_indexing.py
@@ -19,7 +19,7 @@
 @pytest.mark.parametrize("ldtype", dtlike_dtypes)
 @pytest.mark.parametrize("rdtype", dtlike_dtypes)
 def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
-    vals = np.tile(3600 * 10**9 * np.arange(3), 2)
+    vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2)
 
     def construct(dtype):
         if dtype is dtlike_dtypes[-1]:
diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
index efeca375affbb..150fc8d6fbaa5 100644
--- a/pandas/tests/scalar/timedelta/test_arithmetic.py
+++ b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -419,7 +419,7 @@ def test_td_mul_numeric_ndarray(self):
 
     def test_td_mul_numeric_ndarray_0d(self):
         td = Timedelta("1 day")
-        other = np.array(2)
+        other = np.array(2, dtype=np.int64)
         assert other.ndim == 0
         expected = Timedelta("2 days")
 
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index cbbd018720bad..b779ef6b17012 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -3182,7 +3182,7 @@ def test_invalid_origin(self, unit):
     )
     def test_epoch(self, units, epochs):
         epoch_1960 = Timestamp(1960, 1, 1)
-        units_from_epochs = list(range(5))
+        units_from_epochs = np.arange(5, dtype=np.int32)
         expected = Series(
             [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
         )

From 5431ab50ce4c291773453727112e76e90762f864 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 18 Jun 2024 14:47:37 -0700
Subject: [PATCH 2/6] 64 not 32

---
 pandas/tests/tools/test_to_datetime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index b779ef6b17012..85813e6041261 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -3182,7 +3182,7 @@ def test_invalid_origin(self, unit):
     )
     def test_epoch(self, units, epochs):
         epoch_1960 = Timestamp(1960, 1, 1)
-        units_from_epochs = np.arange(5, dtype=np.int32)
+        units_from_epochs = np.arange(5, dtype=np.int64)
         expected = Series(
             [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
         )

From ca28f250be92757fe3053019ff183139bb53fc56 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 18 Jun 2024 17:37:44 -0700
Subject: [PATCH 3/6] Adjust some tests

---
 pandas/tests/dtypes/test_inference.py    |  3 +--
 pandas/tests/indexing/test_coercion.py   |  8 -------
 pandas/tests/indexing/test_loc.py        | 12 +---------
 pandas/tests/series/test_constructors.py | 28 +++++-------------------
 4 files changed, 8 insertions(+), 43 deletions(-)

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index db18cd4aef14e..097ed1ab96d3d 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -34,7 +34,6 @@
     missing as libmissing,
     ops as libops,
 )
-from pandas.compat.numpy import np_version_gt2
 
 from pandas.core.dtypes import inference
 from pandas.core.dtypes.cast import find_result_type
@@ -1989,7 +1988,7 @@ def test_ensure_int32():
         # find a smaller floating dtype
         (300.0, np.uint16),  # for integer floats, we convert them to ints
         (300.1, np.float64),
-        (np.int16(300), np.int16 if np_version_gt2 else np.uint16),
+        (np.int16(300), np.uint16),
     ],
 )
 def test_find_result_type_uint_int(right, result):
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 84cd0d3b08b7b..25ba039f7dafe 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -15,7 +15,6 @@
     IS64,
     is_platform_windows,
 )
-from pandas.compat.numpy import np_version_gt2
 
 import pandas as pd
 import pandas._testing as tm
@@ -219,8 +218,6 @@ def test_insert_int_index(
         "insert, coerced_val, coerced_dtype",
         [
             (1, 1.0, None),
-            # When float_numpy_dtype=float32, this is not the case
-            # see the correction below
             (1.1, 1.1, np.float64),
             (False, False, object),  # GH#36319
             ("x", "x", object),
@@ -232,11 +229,6 @@ def test_insert_float_index(
         dtype = float_numpy_dtype
         obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
         coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
-
-        if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
-            # Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
-            # the expected dtype will be float32 if the original dtype was float32
-            coerced_dtype = np.float32
         exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
         self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
 
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 16f3e0fd0c229..f771efe4a0342 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -16,7 +16,6 @@
 from pandas._config import using_pyarrow_string_dtype
 
 from pandas._libs import index as libindex
-from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IndexingError
 
 import pandas as pd
@@ -2976,16 +2975,7 @@ def test_loc_setitem_uint8_upcast(value):
     df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8")
     with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"):
         df.loc[2, "col1"] = value  # value that can't be held in uint8
-
-    if np_version_gt2 and isinstance(value, np.int16):
-        # Note, result type of uint8 + int16 is int16
-        # in numpy < 2, though, numpy would inspect the
-        # value and see that it could fit in an uint16, resulting in a uint16
-        dtype = "int16"
-    else:
-        dtype = "uint16"
-
-    expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype)
+    expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16")
     tm.assert_frame_equal(df, expected)
 
 
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 44a7862c21273..d3272a77cd7c6 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -769,12 +769,8 @@ def test_constructor_cast(self):
 
     def test_constructor_signed_int_overflow_raises(self):
         # GH#41734 disallow silent overflow, enforced in 2.0
-        if np_version_gt2:
-            msg = "The elements provided in the data cannot all be casted to the dtype"
-            err = OverflowError
-        else:
-            msg = "Values are too large to be losslessly converted"
-            err = ValueError
+        msg = "Values are too large to be losslessly converted"
+        err = ValueError
         with pytest.raises(err, match=msg):
             Series([1, 200, 923442], dtype="int8")
 
@@ -802,13 +798,7 @@ def test_constructor_numpy_uints(self, values):
 
     def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype):
         # see gh-15832
-        if np_version_gt2:
-            msg = (
-                f"The elements provided in the data cannot "
-                f"all be casted to the dtype {any_unsigned_int_numpy_dtype}"
-            )
-        else:
-            msg = "Trying to coerce negative values to unsigned integers"
+        msg = "Trying to coerce negative values to unsigned integers"
         with pytest.raises(OverflowError, match=msg):
             Series([-1], dtype=any_unsigned_int_numpy_dtype)
 
@@ -1938,15 +1928,9 @@ def test_constructor_int64_dtype(self, any_int_dtype):
 
     def test_constructor_raise_on_lossy_conversion_of_strings(self):
         # GH#44923
-        if not np_version_gt2:
-            raises = pytest.raises(
-                ValueError, match="string values cannot be losslessly cast to int8"
-            )
-        else:
-            raises = pytest.raises(
-                OverflowError, match="The elements provided in the data"
-            )
-        with raises:
+        with pytest.raises(
+            ValueError, match="string values cannot be losslessly cast to int8"
+        ):
             Series(["128"], dtype="int8")
 
     def test_constructor_dtype_timedelta_alternative_construct(self):

From 67f83ff20dd54269c892a25e0165bde291c295b8 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 24 Jun 2024 11:11:16 -0700
Subject: [PATCH 4/6] Revert "Adjust some tests"

This reverts commit ca28f250be92757fe3053019ff183139bb53fc56.
---
 pandas/tests/dtypes/test_inference.py    |  3 ++-
 pandas/tests/indexing/test_coercion.py   |  8 +++++++
 pandas/tests/indexing/test_loc.py        | 12 +++++++++-
 pandas/tests/series/test_constructors.py | 28 +++++++++++++++++++-----
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index fbdb8eacd2f9b..b1d7c701e1267 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -34,6 +34,7 @@
     missing as libmissing,
     ops as libops,
 )
+from pandas.compat.numpy import np_version_gt2
 
 from pandas.core.dtypes import inference
 from pandas.core.dtypes.cast import find_result_type
@@ -1988,7 +1989,7 @@ def test_ensure_int32():
         # find a smaller floating dtype
         (300.0, np.uint16),  # for integer floats, we convert them to ints
         (300.1, np.float64),
-        (np.int16(300), np.uint16),
+        (np.int16(300), np.int16 if np_version_gt2 else np.uint16),
     ],
 )
 def test_find_result_type_uint_int(right, result):
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 25ba039f7dafe..84cd0d3b08b7b 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -15,6 +15,7 @@
     IS64,
     is_platform_windows,
 )
+from pandas.compat.numpy import np_version_gt2
 
 import pandas as pd
 import pandas._testing as tm
@@ -218,6 +219,8 @@ def test_insert_int_index(
         "insert, coerced_val, coerced_dtype",
         [
             (1, 1.0, None),
+            # When float_numpy_dtype=float32, this is not the case
+            # see the correction below
             (1.1, 1.1, np.float64),
             (False, False, object),  # GH#36319
             ("x", "x", object),
@@ -229,6 +232,11 @@ def test_insert_float_index(
         dtype = float_numpy_dtype
         obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
         coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
+
+        if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
+            # Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
+            # the expected dtype will be float32 if the original dtype was float32
+            coerced_dtype = np.float32
         exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
         self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
 
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index f771efe4a0342..16f3e0fd0c229 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -16,6 +16,7 @@
 from pandas._config import using_pyarrow_string_dtype
 
 from pandas._libs import index as libindex
+from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IndexingError
 
 import pandas as pd
@@ -2975,7 +2976,16 @@ def test_loc_setitem_uint8_upcast(value):
     df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8")
     with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"):
         df.loc[2, "col1"] = value  # value that can't be held in uint8
-    expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16")
+
+    if np_version_gt2 and isinstance(value, np.int16):
+        # Note, result type of uint8 + int16 is int16
+        # in numpy < 2, though, numpy would inspect the
+        # value and see that it could fit in an uint16, resulting in a uint16
+        dtype = "int16"
+    else:
+        dtype = "uint16"
+
+    expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype)
     tm.assert_frame_equal(df, expected)
 
 
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index d3272a77cd7c6..44a7862c21273 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -769,8 +769,12 @@ def test_constructor_cast(self):
 
     def test_constructor_signed_int_overflow_raises(self):
         # GH#41734 disallow silent overflow, enforced in 2.0
-        msg = "Values are too large to be losslessly converted"
-        err = ValueError
+        if np_version_gt2:
+            msg = "The elements provided in the data cannot all be casted to the dtype"
+            err = OverflowError
+        else:
+            msg = "Values are too large to be losslessly converted"
+            err = ValueError
         with pytest.raises(err, match=msg):
             Series([1, 200, 923442], dtype="int8")
 
@@ -798,7 +802,13 @@ def test_constructor_numpy_uints(self, values):
 
     def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype):
         # see gh-15832
-        msg = "Trying to coerce negative values to unsigned integers"
+        if np_version_gt2:
+            msg = (
+                f"The elements provided in the data cannot "
+                f"all be casted to the dtype {any_unsigned_int_numpy_dtype}"
+            )
+        else:
+            msg = "Trying to coerce negative values to unsigned integers"
         with pytest.raises(OverflowError, match=msg):
             Series([-1], dtype=any_unsigned_int_numpy_dtype)
 
@@ -1928,9 +1938,15 @@ def test_constructor_int64_dtype(self, any_int_dtype):
 
     def test_constructor_raise_on_lossy_conversion_of_strings(self):
         # GH#44923
-        with pytest.raises(
-            ValueError, match="string values cannot be losslessly cast to int8"
-        ):
+        if not np_version_gt2:
+            raises = pytest.raises(
+                ValueError, match="string values cannot be losslessly cast to int8"
+            )
+        else:
+            raises = pytest.raises(
+                OverflowError, match="The elements provided in the data"
+            )
+        with raises:
             Series(["128"], dtype="int8")
 
     def test_constructor_dtype_timedelta_alternative_construct(self):

From 90b6e906fa25c890964753d4534acec3df596ee7 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 24 Jun 2024 12:11:18 -0700
Subject: [PATCH 5/6] Just pin numpy in pyarrow nightly build

---
 ci/deps/actions-311-pyarrownightly.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml
index d84063ac2a9ba..5455b9b84b034 100644
--- a/ci/deps/actions-311-pyarrownightly.yaml
+++ b/ci/deps/actions-311-pyarrownightly.yaml
@@ -18,7 +18,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
   - pip
 

From cd0b63bd6743df3d9e2e1f43118950d4053b9a68 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 24 Jun 2024 13:08:56 -0700
Subject: [PATCH 6/6] Mark test as pa under 17

---
 pandas/compat/__init__.py       | 2 ++
 pandas/compat/pyarrow.py        | 2 ++
 pandas/tests/io/test_parquet.py | 5 ++++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 13e6707667d0a..e08da7c7e14e3 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -31,6 +31,7 @@
     pa_version_under14p0,
     pa_version_under14p1,
     pa_version_under16p0,
+    pa_version_under17p0,
 )
 
 if TYPE_CHECKING:
@@ -154,6 +155,7 @@ def is_ci_environment() -> bool:
     "pa_version_under14p0",
     "pa_version_under14p1",
     "pa_version_under16p0",
+    "pa_version_under17p0",
     "IS64",
     "ISMUSL",
     "PY311",
diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
index 5a96e5a4cc49a..87d3dc86cee87 100644
--- a/pandas/compat/pyarrow.py
+++ b/pandas/compat/pyarrow.py
@@ -16,6 +16,7 @@
     pa_version_under14p1 = _palv < Version("14.0.1")
     pa_version_under15p0 = _palv < Version("15.0.0")
     pa_version_under16p0 = _palv < Version("16.0.0")
+    pa_version_under17p0 = _palv < Version("17.0.0")
 except ImportError:
     pa_version_under10p1 = True
     pa_version_under11p0 = True
@@ -25,3 +26,4 @@
     pa_version_under14p1 = True
     pa_version_under15p0 = True
     pa_version_under16p0 = True
+    pa_version_under17p0 = True
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 2e8e358b8e3c9..930df8abea30f 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -14,6 +14,7 @@
     pa_version_under11p0,
     pa_version_under13p0,
     pa_version_under15p0,
+    pa_version_under17p0,
 )
 
 import pandas as pd
@@ -1033,7 +1034,9 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa):
             expected=expected,
         )
 
-    @pytest.mark.xfail(reason="pa.pandas_compat passes 'datetime64' to .astype")
+    @pytest.mark.xfail(
+        pa_version_under17p0, reason="pa.pandas_compat passes 'datetime64' to .astype"
+    )
     def test_columns_dtypes_not_invalid(self, pa):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})