From 97d7e11082270d5813fc5c700746bd753c410b09 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 6 Jun 2021 16:56:17 -0400
Subject: [PATCH 1/3] precommit fixup

---
 pandas/_libs/groupby.pyx | 89 ++++++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 26 deletions(-)
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index b72b927b3c2a8..40933c8f30319 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1345,13 +1345,8 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     This method modifies the `out` parameter, rather than returning an object.
     """
     cdef:
-        Py_ssize_t i, j, N, K, size
-        groupby_t val, mval
+        Py_ssize_t N, K
         groupby_t[:, ::1] accum
-        intp_t lab
-        bint val_is_nan, use_mask
-
-    use_mask = mask is not None
 
     N, K = (<object>values).shape
     accum = np.empty((ngroups, K), dtype=values.dtype)
@@ -1362,36 +1357,78 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     else:
         accum[:] = -np.inf if compute_max else np.inf
 
+    if mask is not None:
+        masked_cummin_max(out, values, mask, labels, accum, N, K, compute_max)
+    else:
+        cummin_max(out, values, labels, accum, N, K, is_datetimelike, compute_max)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef cummin_max(groupby_t[:, ::1] out,
+                ndarray[groupby_t, ndim=2] values,
+                const intp_t[:] labels,
+                groupby_t[:, ::1] accum,
+                Py_ssize_t N,
+                Py_ssize_t K,
+                bint is_datetimelike,
+                bint compute_max):
+    """
+    Compute the cumulative minimum/maximum of columns of `values`, in row groups
+    `labels`.
+    """
+    cdef:
+        Py_ssize_t i, j
+        groupby_t val, mval
+        intp_t lab
+
     with nogil:
         for i in range(N):
             lab = labels[i]
-
             if lab < 0:
                 continue
             for j in range(K):
-                val_is_nan = False
-
-                if use_mask:
-                    if mask[i, j]:
-
-                        # `out` does not need to be set since it
-                        # will be masked anyway
-                        val_is_nan = True
+                val = values[i, j]
+                if not _treat_as_na(val, is_datetimelike):
+                    mval = accum[lab, j]
+                    if compute_max:
+                        if val > mval:
+                            accum[lab, j] = mval = val
                     else:
+                        if val < mval:
+                            accum[lab, j] = mval = val
+                    out[i, j] = mval
+                else:
+                    out[i, j] = val
 
-                        # If using the mask, we can avoid grabbing the
-                        # value unless necessary
-                        val = values[i, j]
 
-                # Otherwise, `out` must be set accordingly if the
-                # value is missing
-                else:
-                    val = values[i, j]
-                    if _treat_as_na(val, is_datetimelike):
-                        val_is_nan = True
-                        out[i, j] = val
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef masked_cummin_max(groupby_t[:, ::1] out,
+                       ndarray[groupby_t, ndim=2] values,
+                       uint8_t[:, ::1] mask,
+                       const intp_t[:] labels,
+                       groupby_t[:, ::1] accum,
+                       Py_ssize_t N,
+                       Py_ssize_t K,
+                       bint compute_max):
+    """
+    Compute the cumulative minimum/maximum of columns of `values`, in row groups
+    `labels` with a masked algorithm.
+    """
+    cdef:
+        Py_ssize_t i, j
+        groupby_t val, mval
+        intp_t lab
 
-                if not val_is_nan:
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+            for j in range(K):
+                if not mask[i, j]:
+                    val = values[i, j]
                     mval = accum[lab, j]
                     if compute_max:
                         if val > mval:

From 885f8ae956e918c6cfe360bd2cb66aebbd5879cc Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 6 Jun 2021 20:18:47 -0400
Subject: [PATCH 2/3] wip

---
 pandas/_libs/groupby.pyx | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 40933c8f30319..ee57ff5c2a205 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1358,9 +1358,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
         accum[:] = -np.inf if compute_max else np.inf
 
     if mask is not None:
-        masked_cummin_max(out, values, mask, labels, accum, N, K, compute_max)
+        masked_cummin_max(out, values, mask, labels, accum, compute_max)
     else:
-        cummin_max(out, values, labels, accum, N, K, is_datetimelike, compute_max)
+        cummin_max(out, values, labels, accum, is_datetimelike, compute_max)
 
 
 @cython.boundscheck(False)
@@ -1369,8 +1369,6 @@ cdef cummin_max(groupby_t[:, ::1] out,
                 ndarray[groupby_t, ndim=2] values,
                 const intp_t[:] labels,
                 groupby_t[:, ::1] accum,
-                Py_ssize_t N,
-                Py_ssize_t K,
                 bint is_datetimelike,
                 bint compute_max):
     """
@@ -1378,10 +1376,11 @@ cdef cummin_max(groupby_t[:, ::1] out,
     `labels`.
     """
     cdef:
-        Py_ssize_t i, j
+        Py_ssize_t i, j, N, K
         groupby_t val, mval
         intp_t lab
 
+    N, K = (<object>values).shape
     with nogil:
         for i in range(N):
             lab = labels[i]
@@ -1409,18 +1408,17 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
                        uint8_t[:, ::1] mask,
                        const intp_t[:] labels,
                        groupby_t[:, ::1] accum,
-                       Py_ssize_t N,
-                       Py_ssize_t K,
                        bint compute_max):
     """
     Compute the cumulative minimum/maximum of columns of `values`, in row groups
     `labels` with a masked algorithm.
     """
     cdef:
-        Py_ssize_t i, j
+        Py_ssize_t i, j, N, K
         groupby_t val, mval
         intp_t lab
 
+    N, K = (<object>values).shape
     with nogil:
         for i in range(N):
             lab = labels[i]

From 51f8f9c8ccd0def2c0a4f93dc398a89ab94e986e Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 7 Jun 2021 11:00:19 -0400
Subject: [PATCH 3/3] Remove unused

---
 pandas/_libs/groupby.pyx | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index ee57ff5c2a205..0e0598c3264e8 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1345,11 +1345,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     This method modifies the `out` parameter, rather than returning an object.
     """
     cdef:
-        Py_ssize_t N, K
         groupby_t[:, ::1] accum
 
-    N, K = (<object>values).shape
-    accum = np.empty((ngroups, K), dtype=values.dtype)
+    accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
     if groupby_t is int64_t:
         accum[:] = -_int64_max if compute_max else _int64_max
     elif groupby_t is uint64_t: