From 60b10216865752328b71d5e9f60b28fc63df3a99 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 11:03:31 +0000
Subject: [PATCH 1/5] Don't modify values of  directly

---
 doc/source/whatsnew/v1.0.2.rst        |  2 +-
 pandas/core/groupby/generic.py        | 24 ++++++++++++++----------
 pandas/tests/groupby/test_function.py |  2 ++
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst
index 0216007ea5ba8..e7c0502d3b992 100644
--- a/doc/source/whatsnew/v1.0.2.rst
+++ b/doc/source/whatsnew/v1.0.2.rst
@@ -19,7 +19,7 @@ Fixed regressions
 - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
 - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
 - Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
--
+- Fixed regression in :meth:`DataFrameGroupBy.nunique` which was modifying the original values if ``NaN``s were present (:issue:`31950`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 37b6429167646..ed3856bd58ed5 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -591,20 +591,24 @@ def nunique(self, dropna: bool = True) -> Series:
 
         val = self.obj._internal_get_values()
 
-        # GH 27951
-        # temporary fix while we wait for NumPy bug 12629 to be fixed
-        val[isna(val)] = np.datetime64("NaT")
-
-        try:
-            sorter = np.lexsort((val, ids))
-        except TypeError:  # catches object dtypes
-            msg = f"val.dtype must be object, got {val.dtype}"
-            assert val.dtype == object, msg
+        def _object_sorter(val, ids):
             val, _ = algorithms.factorize(val, sort=False)
             sorter = np.lexsort((val, ids))
             _isna = lambda a: a == -1
+            return val, sorter, _isna
+
+        if isna(val).any() and val.dtype == object:
+            # Deal with pandas.NaT
+            val, sorter, _isna = _object_sorter(val, ids)
         else:
-            _isna = isna
+            try:
+                sorter = np.lexsort((val, ids))
+            except TypeError:  # catches object dtypes
+                msg = f"val.dtype must be object, got {val.dtype}"
+                assert val.dtype == object, msg
+                val, sorter, _isna = _object_sorter(val, ids)
+            else:
+                _isna = isna
 
         ids, val = ids[sorter], val[sorter]
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 73e36cb5e6c84..245ed5bf9900b 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -966,6 +966,7 @@ def test_frame_describe_unstacked_format():
 @pytest.mark.parametrize("dropna", [False, True])
 def test_series_groupby_nunique(n, m, sort, dropna):
     def check_nunique(df, keys, as_index=True):
+        original_df = df.copy()
         gr = df.groupby(keys, as_index=as_index, sort=sort)
         left = gr["julie"].nunique(dropna=dropna)
 
@@ -975,6 +976,7 @@ def check_nunique(df, keys, as_index=True):
             right = right.reset_index(drop=True)
 
         tm.assert_series_equal(left, right, check_names=False)
+        tm.assert_frame_equal(df, original_df)
 
     days = date_range("2015-08-23", periods=10)
 

From 86b9bf6e54127d5f502258149779224f45d3a594 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 11:31:24 +0000
Subject: [PATCH 2/5] fix whatsnew

---
 doc/source/whatsnew/v1.0.2.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst
index e7c0502d3b992..77a8e7dc6e39d 100644
--- a/doc/source/whatsnew/v1.0.2.rst
+++ b/doc/source/whatsnew/v1.0.2.rst
@@ -19,7 +19,7 @@ Fixed regressions
 - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
 - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
 - Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
-- Fixed regression in :meth:`DataFrameGroupBy.nunique` which was modifying the original values if ``NaN``s were present (:issue:`31950`)
+- Fixed regression in :meth:`DataFrameGroupBy.nunique` which was modifying the original values if ``NaN`` values were present (:issue:`31950`)
 
 .. ---------------------------------------------------------------------------
 

From ef59ac6ba0ce6d565fb76bcb73351d6106a7cf3f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sat, 22 Feb 2020 11:26:36 +0000
Subject: [PATCH 3/5] no need to special case nat

---
 pandas/core/groupby/generic.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ed3856bd58ed5..61d3a3d5a4a77 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -597,8 +597,7 @@ def _object_sorter(val, ids):
             _isna = lambda a: a == -1
             return val, sorter, _isna
 
-        if isna(val).any() and val.dtype == object:
-            # Deal with pandas.NaT
+        if isna(val).any():
             val, sorter, _isna = _object_sorter(val, ids)
         else:
             try:

From 02326fb552d0eec3d7d6fcbbb1f62c8c274991d0 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sat, 22 Feb 2020 11:29:05 +0000
Subject: [PATCH 4/5] remove file

---
 pandas/tests/mytest.py | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 pandas/tests/mytest.py

diff --git a/pandas/tests/mytest.py b/pandas/tests/mytest.py
deleted file mode 100644
index 23bc27f271dd2..0000000000000
--- a/pandas/tests/mytest.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import pandas as pd
-import pytest
-
-@pytest.mark.xfail(strict=True)
-def test():
-    index = pd.period_range(start='2018-01', periods=24, freq='M')
-    periodSerie = pd.Series(range(24),index=index)
-    periodSerie.index.name = 'Month'
-    periodSerie.groupby(periodSerie.index.month).sum()

From 88a7eaed20940b26d5dd48f066f46b1bedbb4c43 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sat, 22 Feb 2020 17:20:14 +0000
Subject: [PATCH 5/5] simplify acc to Jeff's review

---
 pandas/core/groupby/generic.py | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 61d3a3d5a4a77..1bb512aee39e2 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -591,33 +591,18 @@ def nunique(self, dropna: bool = True) -> Series:
 
         val = self.obj._internal_get_values()
 
-        def _object_sorter(val, ids):
-            val, _ = algorithms.factorize(val, sort=False)
-            sorter = np.lexsort((val, ids))
-            _isna = lambda a: a == -1
-            return val, sorter, _isna
-
-        if isna(val).any():
-            val, sorter, _isna = _object_sorter(val, ids)
-        else:
-            try:
-                sorter = np.lexsort((val, ids))
-            except TypeError:  # catches object dtypes
-                msg = f"val.dtype must be object, got {val.dtype}"
-                assert val.dtype == object, msg
-                val, sorter, _isna = _object_sorter(val, ids)
-            else:
-                _isna = isna
-
-        ids, val = ids[sorter], val[sorter]
+        codes, _ = algorithms.factorize(val, sort=False)
+        sorter = np.lexsort((codes, ids))
+        codes = codes[sorter]
+        ids = ids[sorter]
 
         # group boundaries are where group ids change
         # unique observations are where sorted values change
         idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
-        inc = np.r_[1, val[1:] != val[:-1]]
+        inc = np.r_[1, codes[1:] != codes[:-1]]
 
         # 1st item of each group is a new unique observation
-        mask = _isna(val)
+        mask = codes == -1
         if dropna:
             inc[idx] = 1
             inc[mask] = 0