From 6c2f679890361681d5e15155c0c0f167d4dd806a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Mar 2022 16:31:10 -0800
Subject: [PATCH 1/2] BUG: GroupBy.cummin altering nullable arrays inplace

---
 pandas/_libs/groupby.pyx              | 48 ++++++++++++++++-----------
 pandas/core/groupby/ops.py            | 18 +++++-----
 pandas/tests/groupby/test_function.py |  4 +++
 3 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index d250a69c1985b..d4d84a4d12860 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1459,7 +1459,8 @@ def group_min(
 cdef group_cummin_max(
     iu_64_floating_t[:, ::1] out,
     ndarray[iu_64_floating_t, ndim=2] values,
-    uint8_t[:, ::1] mask,
+    const uint8_t[:, ::1] mask,
+    uint8_t[:, ::1] result_mask,
     const intp_t[::1] labels,
     int ngroups,
     bint is_datetimelike,
@@ -1478,6 +1479,9 @@ cdef group_cummin_max(
     mask : np.ndarray[bool] or None
         If not None, indices represent missing values,
         otherwise the mask will not be used
+    result_mask : ndarray[bool, ndim=2], optional
+        If not None, these specify locations in the output that are NA.
+        Modified in-place.
     labels : np.ndarray[np.intp]
         Labels to group by.
     ngroups : int
@@ -1540,7 +1544,7 @@ cdef group_cummin_max(
 
                 if not skipna and na_possible and seen_na[lab, j]:
                     if uses_mask:
-                        mask[i, j] = 1   # FIXME: shouldn't alter inplace
+                        result_mask[i, j] = 1
                         # Set to 0 ensures that we are deterministic and can
                         #  downcast if appropriate
                         out[i, j] = 0
@@ -1577,19 +1581,21 @@ def group_cummin(
     const intp_t[::1] labels,
     int ngroups,
     bint is_datetimelike,
-    uint8_t[:, ::1] mask=None,
+    const uint8_t[:, ::1] mask=None,
+    uint8_t[:, ::1] result_mask=None,
     bint skipna=True,
 ) -> None:
     """See group_cummin_max.__doc__"""
     group_cummin_max(
-        out,
-        values,
-        mask,
-        labels,
-        ngroups,
-        is_datetimelike,
-        skipna,
-        compute_max=False
+        out=out,
+        values=values,
+        mask=mask,
+        result_mask=result_mask,
+        labels=labels,
+        ngroups=ngroups,
+        is_datetimelike=is_datetimelike,
+        skipna=skipna,
+        compute_max=False,
     )
 
 
@@ -1601,17 +1607,19 @@ def group_cummax(
     const intp_t[::1] labels,
     int ngroups,
     bint is_datetimelike,
-    uint8_t[:, ::1] mask=None,
+    const uint8_t[:, ::1] mask=None,
+    uint8_t[:, ::1] result_mask=None,
     bint skipna=True,
 ) -> None:
     """See group_cummin_max.__doc__"""
     group_cummin_max(
-        out,
-        values,
-        mask,
-        labels,
-        ngroups,
-        is_datetimelike,
-        skipna,
-        compute_max=True
+        out=out,
+        values=values,
+        mask=mask,
+        result_mask=result_mask,
+        labels=labels,
+        ngroups=ngroups,
+        is_datetimelike=is_datetimelike,
+        skipna=skipna,
+        compute_max=True,
     )
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index e984279c17a65..7a25a6b9008dd 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -418,9 +418,13 @@ def _masked_ea_wrap_cython_operation(
         """
         orig_values = values
 
-        # Copy to ensure input and result masks don't end up shared
-        mask = values._mask.copy()
-        result_mask = np.zeros(ngroups, dtype=bool)
+        # libgroupby functions are responsible for NOT altering mask
+        mask = values._mask
+        if self.kind != "aggregate":
+            result_mask = mask.copy()
+        else:
+            result_mask = np.zeros(ngroups, dtype=bool)
+
         arr = values._data
 
         res_values = self._cython_op_ndim_compat(
@@ -438,12 +442,7 @@ def _masked_ea_wrap_cython_operation(
         #  dtype; last attempt ran into trouble on 32bit linux build
         res_values = res_values.astype(dtype.type, copy=False)
 
-        if self.kind != "aggregate":
-            out_mask = mask
-        else:
-            out_mask = result_mask
-
-        return orig_values._maybe_mask_result(res_values, out_mask)
+        return orig_values._maybe_mask_result(res_values, result_mask)
 
     @final
     def _cython_op_ndim_compat(
@@ -579,6 +578,7 @@ def _call_cython_op(
                     ngroups=ngroups,
                     is_datetimelike=is_datetimelike,
                     mask=mask,
+                    result_mask=result_mask,
                     **kwargs,
                 )
             else:
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 1555e9d02c8ca..ec7f3200b682b 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -843,11 +843,15 @@ def test_cummax(dtypes_for_minmax):
 def test_cummin_max_skipna(method, dtype, groups, expected_data):
     # GH-34047
     df = DataFrame({"a": Series([1, None, 2], dtype=dtype)})
+    orig = df.copy()
     gb = df.groupby(groups)["a"]
 
     result = getattr(gb, method)(skipna=False)
     expected = Series(expected_data, dtype=dtype, name="a")
 
+    # check we didn't accidentally alter df
+    tm.assert_frame_equal(df, orig)
+
     tm.assert_series_equal(result, expected)
 
 

From bfd5aaba67b778e73fa19cbec6f4aafb8ea21d78 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 4 Mar 2022 12:09:23 -0800
Subject: [PATCH 2/2] whatnsew

---
 doc/source/whatsnew/v1.5.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index adee9271c23b0..2db1a636bcf0b 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -429,6 +429,8 @@ Groupby/resample/rolling
 - Bug in :meth:`.ExponentialMovingWindow.mean` with ``axis=1`` and ``engine='numba'`` when the :class:`DataFrame` has more columns than rows (:issue:`46086`)
 - Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`)
 - Bug in :meth:`.DataFrameGroupby.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`)
+- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
+-
 
 Reshaping
 ^^^^^^^^^