diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
index 5bfc594602dd8..64e8bdea4672c 100644
--- a/pandas/_libs/algos_common_helper.pxi.in
+++ b/pandas/_libs/algos_common_helper.pxi.in
@@ -18,7 +18,8 @@ def ensure_platform_int(object arr):
         if (<ndarray>arr).descr.type_num == PLATFORM_INT:
             return arr
         else:
-            return arr.astype(np.intp)
+            # equiv: arr.astype(np.intp)
+            return cnp.PyArray_Cast(<ndarray>arr, PLATFORM_INT)
     else:
         return np.array(arr, dtype=np.intp)
 
@@ -28,7 +29,8 @@ def ensure_object(object arr):
         if (<ndarray>arr).descr.type_num == NPY_OBJECT:
             return arr
         else:
-            return arr.astype(np.object_)
+            # equiv: arr.astype(object)
+            return cnp.PyArray_Cast(<ndarray>arr, NPY_OBJECT)
     else:
         return np.array(arr, dtype=np.object_)
 
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 40e82798c0753..1bfb66cbf21ac 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -169,7 +169,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cumprod_float64(float64_t[:, :] out,
+def group_cumprod_float64(float64_t[:, ::1] out,
                           const float64_t[:, :] values,
                           const int64_t[:] labels,
                           int ngroups,
@@ -200,7 +200,7 @@ def group_cumprod_float64(float64_t[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, size
         float64_t val
-        float64_t[:, :] accum
+        float64_t[:, ::1] accum
         int64_t lab
 
     N, K = (<object>values).shape
@@ -226,7 +226,7 @@ def group_cumprod_float64(float64_t[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cumsum(numeric[:, :] out,
+def group_cumsum(numeric[:, ::1] out,
                  ndarray[numeric, ndim=2] values,
                  const int64_t[:] labels,
                  int ngroups,
@@ -257,7 +257,7 @@ def group_cumsum(numeric[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, size
         numeric val, y, t
-        numeric[:, :] accum, compensation
+        numeric[:, ::1] accum, compensation
         int64_t lab
 
     N, K = (<object>values).shape
@@ -295,14 +295,14 @@ def group_cumsum(numeric[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
+def group_shift_indexer(int64_t[::1] out, const int64_t[:] labels,
                         int ngroups, int periods):
     cdef:
         Py_ssize_t N, i, j, ii
         int offset = 0, sign
         int64_t lab, idxer, idxer_slot
-        int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
-        int64_t[:, :] label_indexer
+        int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64)
+        int64_t[:, ::1] label_indexer
 
     N, = (<object>labels).shape
 
@@ -409,10 +409,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_any_all(uint8_t[:] out,
-                  const uint8_t[:] values,
+def group_any_all(uint8_t[::1] out,
+                  const uint8_t[::1] values,
                   const int64_t[:] labels,
-                  const uint8_t[:] mask,
+                  const uint8_t[::1] mask,
                   object val_test,
                   bint skipna):
     """
@@ -478,8 +478,8 @@ ctypedef fused complexfloating_t:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_add(complexfloating_t[:, :] out,
-               int64_t[:] counts,
+def _group_add(complexfloating_t[:, ::1] out,
+               int64_t[::1] counts,
                ndarray[complexfloating_t, ndim=2] values,
                const int64_t[:] labels,
                Py_ssize_t min_count=0):
@@ -489,8 +489,8 @@ def _group_add(complexfloating_t[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         complexfloating_t val, count, t, y
-        complexfloating_t[:, :] sumx, compensation
-        int64_t[:, :] nobs
+        complexfloating_t[:, ::1] sumx, compensation
+        int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
 
     if len_values != len_labels:
@@ -537,8 +537,8 @@ group_add_complex128 = _group_add['double complex']
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_prod(floating[:, :] out,
-                int64_t[:] counts,
+def _group_prod(floating[:, ::1] out,
+                int64_t[::1] counts,
                 ndarray[floating, ndim=2] values,
                 const int64_t[:] labels,
                 Py_ssize_t min_count=0):
@@ -548,8 +548,8 @@ def _group_prod(floating[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, count
-        floating[:, :] prodx
-        int64_t[:, :] nobs
+        floating[:, ::1] prodx
+        int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
 
     if len_values != len_labels:
@@ -590,8 +590,8 @@ group_prod_float64 = _group_prod['double']
 @cython.wraparound(False)
 @cython.boundscheck(False)
 @cython.cdivision(True)
-def _group_var(floating[:, :] out,
-               int64_t[:] counts,
+def _group_var(floating[:, ::1] out,
+               int64_t[::1] counts,
                ndarray[floating, ndim=2] values,
                const int64_t[:] labels,
                Py_ssize_t min_count=-1,
@@ -599,8 +599,8 @@ def _group_var(floating[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, ct, oldmean
-        floating[:, :] mean
-        int64_t[:, :] nobs
+        floating[:, ::1] mean
+        int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -648,16 +648,16 @@ group_var_float64 = _group_var['double']
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_mean(floating[:, :] out,
-                int64_t[:] counts,
+def _group_mean(floating[:, ::1] out,
+                int64_t[::1] counts,
                 ndarray[floating, ndim=2] values,
-                const int64_t[:] labels,
+                const int64_t[::1] labels,
                 Py_ssize_t min_count=-1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, count, y, t
-        floating[:, :] sumx, compensation
-        int64_t[:, :] nobs
+        floating[:, ::1] sumx, compensation
+        int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -704,8 +704,8 @@ group_mean_float64 = _group_mean['double']
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_ohlc(floating[:, :] out,
-                int64_t[:] counts,
+def _group_ohlc(floating[:, ::1] out,
+                int64_t[::1] counts,
                 ndarray[floating, ndim=2] values,
                 const int64_t[:] labels,
                 Py_ssize_t min_count=-1):
@@ -898,8 +898,8 @@ cdef inline bint _treat_as_na(rank_t val, bint is_datetimelike) nogil:
 #  use `const rank_t[:, :] values`
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_last(rank_t[:, :] out,
-               int64_t[:] counts,
+def group_last(rank_t[:, ::1] out,
+               int64_t[::1] counts,
                ndarray[rank_t, ndim=2] values,
                const int64_t[:] labels,
                Py_ssize_t min_count=-1):
@@ -990,8 +990,8 @@ def group_last(rank_t[:, :] out,
 #  use `const rank_t[:, :] values`
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_nth(rank_t[:, :] out,
-              int64_t[:] counts,
+def group_nth(rank_t[:, ::1] out,
+              int64_t[::1] counts,
               ndarray[rank_t, ndim=2] values,
               const int64_t[:] labels,
               int64_t min_count=-1, int64_t rank=1
@@ -1083,7 +1083,7 @@ def group_nth(rank_t[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_rank(float64_t[:, :] out,
+def group_rank(float64_t[:, ::1] out,
                ndarray[rank_t, ndim=2] values,
                const int64_t[:] labels,
                int ngroups,
@@ -1154,8 +1154,8 @@ ctypedef fused groupby_t:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_max(groupby_t[:, :] out,
-              int64_t[:] counts,
+def group_max(groupby_t[:, ::1] out,
+              int64_t[::1] counts,
               ndarray[groupby_t, ndim=2] values,
               const int64_t[:] labels,
               Py_ssize_t min_count=-1):
@@ -1167,7 +1167,7 @@ def group_max(groupby_t[:, :] out,
         groupby_t val, count, nan_val
         ndarray[groupby_t, ndim=2] maxx
         bint runtime_error = False
-        int64_t[:, :] nobs
+        int64_t[:, ::1] nobs
 
     # TODO(cython 3.0):
     # Instead of `labels.shape[0]` use `len(labels)`
@@ -1229,8 +1229,8 @@ def group_max(groupby_t[:, :] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min(groupby_t[:, :] out,
-              int64_t[:] counts,
+def group_min(groupby_t[:, ::1] out,
+              int64_t[::1] counts,
               ndarray[groupby_t, ndim=2] values,
               const int64_t[:] labels,
               Py_ssize_t min_count=-1):
@@ -1242,7 +1242,7 @@ def group_min(groupby_t[:, :] out,
         groupby_t val, count, nan_val
         ndarray[groupby_t, ndim=2] minx
         bint runtime_error = False
-        int64_t[:, :] nobs
+        int64_t[:, ::1] nobs
 
     # TODO(cython 3.0):
     # Instead of `labels.shape[0]` use `len(labels)`
@@ -1302,7 +1302,7 @@ def group_min(groupby_t[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cummin(groupby_t[:, :] out,
+def group_cummin(groupby_t[:, ::1] out,
                  ndarray[groupby_t, ndim=2] values,
                  const int64_t[:] labels,
                  int ngroups,
@@ -1362,7 +1362,7 @@ def group_cummin(groupby_t[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cummax(groupby_t[:, :] out,
+def group_cummax(groupby_t[:, ::1] out,
                  ndarray[groupby_t, ndim=2] values,
                  const int64_t[:] labels,
                  int ngroups,
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index d6a3d18f711d0..d2f47c9d25496 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -390,7 +390,7 @@ def _create_binary_propagating_op(name, is_divmod=False):
     return method
 
 
-def _create_unary_propagating_op(name):
+def _create_unary_propagating_op(name: str):
     def method(self):
         return NA
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index a61e8872a7ce7..008ee4dff4f7b 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -652,7 +652,7 @@ def _cython_operation(
             result = self._aggregate(result, counts, values, codes, func, min_count)
         elif kind == "transform":
             result = maybe_fill(
-                np.empty_like(values, dtype=out_dtype), fill_value=np.nan
+                np.empty(values.shape, dtype=out_dtype), fill_value=np.nan
             )
 
             # TODO: min_count