pandas-dev · mroeschke · Mar 29, 2023 · Mar 27, 2023 · Mar 27, 2023 · Mar 28, 2023
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -51,10 +51,12 @@ from pandas._libs.khash cimport (
     kh_resize_int64,
     khiter_t,
 )
+from pandas._libs.missing cimport (
+    checknull,
+    isnaobj,
+)
 from pandas._libs.util cimport get_nat
 
-import pandas._libs.missing as missing
-
 cdef:
     float64_t FP_ERR = 1e-13
     float64_t NaN = <float64_t>np.NaN
@@ -95,10 +97,10 @@ class Infinity:
 
     def __gt__(self, other):
         return (not isinstance(other, Infinity) and
-                not missing.checknull(other))
+                not checknull(other))
 
     def __ge__(self, other):
-        return not missing.checknull(other)
+        return not checknull(other)
 
 
 class NegInfinity:
@@ -107,10 +109,10 @@ class NegInfinity:
     """
     def __lt__(self, other):
         return  (not isinstance(other, NegInfinity) and
-                 not missing.checknull(other))
+                 not checknull(other))
 
     def __le__(self, other):
-        return not missing.checknull(other)
+        return not checknull(other)
 
     def __eq__(self, other):
         return isinstance(other, NegInfinity)
@@ -988,7 +990,7 @@ def rank_1d(
     if mask is not None:
         pass
     elif numeric_object_t is object:
-        mask = missing.isnaobj(masked_vals)
+        mask = isnaobj(masked_vals)
     elif numeric_object_t is int64_t and is_datetimelike:
         mask = (masked_vals == NPY_NAT).astype(np.uint8)
     elif numeric_object_t is float64_t or numeric_object_t is float32_t:
@@ -1366,7 +1368,7 @@ def rank_2d(
         nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
 
         if numeric_object_t is object:
-            mask = missing.isnaobj(values).view(np.uint8)
+            mask = isnaobj(values).view(np.uint8)
         elif numeric_object_t is float64_t or numeric_object_t is float32_t:
             mask = np.isnan(values).view(np.uint8)
         else:

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -49,7 +49,6 @@ from pandas._libs.missing cimport checknull
 
 
 cdef int64_t NPY_NAT = util.get_nat()
-_int64_max = np.iinfo(np.int64).max
 
 cdef float64_t NaN = <float64_t>np.NaN
 
@@ -256,9 +255,9 @@ def group_cumprod(
         Always false, `values` is never datetime-like.
     skipna : bool
         If true, ignore nans in `values`.
-    mask: np.ndarray[uint8], optional
+    mask : np.ndarray[uint8], optional
         Mask of values
-    result_mask: np.ndarray[int8], optional
+    result_mask : np.ndarray[int8], optional
         Mask of out array
 
     Notes
@@ -345,9 +344,9 @@ def group_cumsum(
         True if `values` contains datetime-like entries.
     skipna : bool
         If true, ignore nans in `values`.
-    mask: np.ndarray[uint8], optional
+    mask : np.ndarray[uint8], optional
         Mask of values
-    result_mask: np.ndarray[int8], optional
+    result_mask : np.ndarray[int8], optional
         Mask of out array
 
     Notes
@@ -615,7 +614,7 @@ def group_any_all(
         # value encountered is True
         flag_val = 1
     else:
-        raise ValueError("'bool_func' must be either 'any' or 'all'!")
+        raise ValueError("'val_test' must be either 'any' or 'all'!")
 
     out[:] = 1 - flag_val
 
@@ -1036,7 +1035,7 @@ def group_ohlc(
         raise NotImplementedError("Argument 'values' must have only one dimension")
 
     if int64float_t is float32_t or int64float_t is float64_t:
-        out[:] = np.nan
+        out[:] = NAN
     else:
         out[:] = 0
 

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -2428,7 +2428,7 @@ def maybe_convert_objects(ndarray[object] objects,
         Seen seen = Seen()
         object val
         _TSObject tsobj
-        float64_t fnan = np.nan
+        float64_t fnan = NaN
 
     if dtype_if_all_nat is not None:
         # in practice we don't expect to ever pass dtype_if_all_nat

diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
@@ -9,6 +9,7 @@ from numpy cimport (
 import numpy as np
 
 cimport numpy as cnp
+from numpy.math cimport NAN
 
 cnp.import_array()
 
@@ -129,7 +130,7 @@ def explode(ndarray[object] values):
                     count += 1
             else:
                 # empty list-like, use a nan marker
-                result[count] = np.nan
+                result[count] = NAN
                 count += 1
         else:
             # replace with the existing scalar

diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
@@ -1,4 +1,5 @@
 cimport cython
+
 import numpy as np
 
 cimport numpy as cnp
@@ -10,16 +11,14 @@ from numpy cimport (
     ndarray,
     uint8_t,
 )
+from numpy.math cimport (
+    INFINITY as INF,
+    NAN as NaN,
+)
 
 cnp.import_array()
 
 
-# -----------------------------------------------------------------------------
-# Preamble stuff
-
-cdef float64_t NaN = <float64_t>np.NaN
-cdef float64_t INF = <float64_t>np.inf
-
 # -----------------------------------------------------------------------------
 
 

diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -150,12 +150,13 @@ def get_date_name_field(
     name based on requested field (e.g. day_name)
     """
     cdef:
-        Py_ssize_t i, count = dtindex.shape[0]
+        Py_ssize_t i
+        cnp.npy_intp count = dtindex.shape[0]
         ndarray[object] out, names
         npy_datetimestruct dts
         int dow
 
-    out = np.empty(count, dtype=object)
+    out = cnp.PyArray_EMPTY(1, &count, cnp.NPY_OBJECT, 0)
 
     if field == "day_name":
         if locale is None:

diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
@@ -270,11 +270,12 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
 
 cdef int64_t[::1] unbox_utcoffsets(object transinfo):
     cdef:
-        Py_ssize_t i, sz
+        Py_ssize_t i
+        cnp.npy_intp sz
         int64_t[::1] arr
 
     sz = len(transinfo)
-    arr = np.empty(sz, dtype="i8")
+    arr = cnp.PyArray_EMPTY(1, &sz, cnp.NPY_INT64, 0)
 
     for i in range(sz):
         arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000

diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx
@@ -1,14 +1,11 @@
 cimport cython
+cimport numpy as cnp
 from cpython.datetime cimport (
     date,
     datetime,
     time,
     tzinfo,
 )
-
-import numpy as np
-
-cimport numpy as cnp
 from numpy cimport (
     int64_t,
     ndarray,
@@ -101,7 +98,7 @@ def ints_to_pydatetime(
     tzinfo tz=None,
     str box="datetime",
     NPY_DATETIMEUNIT reso=NPY_FR_ns,
-) -> np.ndarray:
+) -> ndarray:
     # stamps is int64, arbitrary ndim
     """
     Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -1078,7 +1078,9 @@ def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray:
         return other + self
 
     @final
-    def _sub_datetimelike_scalar(self, other: datetime | np.datetime64):
+    def _sub_datetimelike_scalar(
+        self, other: datetime | np.datetime64
+    ) -> TimedeltaArray:
         if self.dtype.kind != "M":
             raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
 
@@ -1095,7 +1097,7 @@ def _sub_datetimelike_scalar(self, other: datetime | np.datetime64):
         return self._sub_datetimelike(ts)
 
     @final
-    def _sub_datetime_arraylike(self, other: DatetimeArray):
+    def _sub_datetime_arraylike(self, other: DatetimeArray) -> TimedeltaArray:
         if self.dtype.kind != "M":
             raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
 
@@ -1296,7 +1298,7 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
         res_values = op(self.astype("O"), np.asarray(other))
         return res_values
 
-    def _accumulate(self, name: str, *, skipna: bool = True, **kwargs):
+    def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self:
         if name not in {"cummin", "cummax"}:
             raise TypeError(f"Accumulation {name} not supported for {type(self)}")
 
@@ -2015,7 +2017,7 @@ def round(
         freq,
         ambiguous: TimeAmbiguous = "raise",
         nonexistent: TimeNonexistent = "raise",
-    ):
+    ) -> Self:
         return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)
 
     @Appender((_round_doc + _floor_example).format(op="floor"))
@@ -2024,7 +2026,7 @@ def floor(
         freq,
         ambiguous: TimeAmbiguous = "raise",
         nonexistent: TimeNonexistent = "raise",
-    ):
+    ) -> Self:
         return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
 
     @Appender((_round_doc + _ceil_example).format(op="ceil"))
@@ -2033,7 +2035,7 @@ def ceil(
         freq,
         ambiguous: TimeAmbiguous = "raise",
         nonexistent: TimeNonexistent = "raise",
-    ):
+    ) -> Self:
         return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
 
     # --------------------------------------------------------------
@@ -2054,7 +2056,7 @@ def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
     def _maybe_clear_freq(self) -> None:
         self._freq = None
 
-    def _with_freq(self, freq):
+    def _with_freq(self, freq) -> Self:
         """
         Helper to get a view on the same data, with a new freq.
 

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -353,7 +353,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
 
         else:
             if hasattr(scalars, "type"):
-                # pyarrow array
+                # pyarrow array; we cannot rely on the "to_numpy" check in
+                #  ensure_string_array because calling scalars.to_numpy would set
+                #  zero_copy_only to True which caused problems see GH#52076
                 scalars = np.array(scalars)
             # convert non-na-likes to str, and nan-likes to StringDtype().na_value
             result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -112,7 +112,6 @@
 _int8_max = np.iinfo(np.int8).max
 _int16_max = np.iinfo(np.int16).max
 _int32_max = np.iinfo(np.int32).max
-_int64_max = np.iinfo(np.int64).max
 
 _dtype_obj = np.dtype(object)