From 54830d924fcd20bdc35e2f64c28079468619f4e8 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Fri, 26 Jan 2024 23:45:11 -0500
Subject: [PATCH 1/6] ENH: Add skipna to groupby.first and groupby.last

---
 doc/source/whatsnew/v2.2.1.rst          |  2 +-
 pandas/_libs/groupby.pyi                |  2 ++
 pandas/_libs/groupby.pyx                | 41 ++++++++++++++++---------
 pandas/_testing/__init__.py             |  7 +++++
 pandas/conftest.py                      | 32 +++++++++++++++++++
 pandas/core/groupby/groupby.py          | 32 ++++++++++++++-----
 pandas/tests/groupby/test_reductions.py | 32 +++++++++++++++++++
 7 files changed, 124 insertions(+), 24 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
index b9b2821ebc468..1100a3b3972e4 100644
--- a/doc/source/whatsnew/v2.2.1.rst
+++ b/doc/source/whatsnew/v2.2.1.rst
@@ -32,7 +32,7 @@ Bug fixes
 
 Other
 ~~~~~
--
+- Added the argument ``skipna`` to :meth:`DataFrameGroupBy.first`, :meth:`DataFrameGroupBy.last`, :meth:`SeriesGroupBy.first`, and :meth:`SeriesGroupBy.last`; achieving ``skipna=False`` used to be available via :meth:`DataFrameGroupBy.nth`, but the behavior was changed in pandas 2.0.0 (:issue:`57019`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_221.contributors:
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index b7130ee35dc57..95ac555303221 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -136,6 +136,7 @@ def group_last(
     result_mask: npt.NDArray[np.bool_] | None = ...,
     min_count: int = ...,  # Py_ssize_t
     is_datetimelike: bool = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_nth(
     out: np.ndarray,  # rank_t[:, ::1]
@@ -147,6 +148,7 @@ def group_nth(
     min_count: int = ...,  # int64_t
     rank: int = ...,  # int64_t
     is_datetimelike: bool = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_rank(
     out: np.ndarray,  # float64_t[:, ::1]
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 45e02c3dd420f..391bb4a3a3fd3 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1428,6 +1428,7 @@ def group_last(
     uint8_t[:, ::1] result_mask=None,
     Py_ssize_t min_count=-1,
     bint is_datetimelike=False,
+    bint skipna=True,
 ) -> None:
     """
     Only aggregates on axis=0
@@ -1462,14 +1463,19 @@ def group_last(
             for j in range(K):
                 val = values[i, j]
 
-                if uses_mask:
-                    isna_entry = mask[i, j]
-                else:
-                    isna_entry = _treat_as_na(val, is_datetimelike)
+                if skipna:
+                    if uses_mask:
+                        isna_entry = mask[i, j]
+                    else:
+                        isna_entry = _treat_as_na(val, is_datetimelike)
+                    if isna_entry:
+                        continue
 
-                if not isna_entry:
-                    nobs[lab, j] += 1
-                    resx[lab, j] = val
+                nobs[lab, j] += 1
+                resx[lab, j] = val
+
+                if uses_mask and not skipna:
+                    result_mask[lab, j] = mask[i, j]
 
     _check_below_mincount(
         out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx
@@ -1490,6 +1496,7 @@ def group_nth(
     int64_t min_count=-1,
     int64_t rank=1,
     bint is_datetimelike=False,
+    bint skipna=True,
 ) -> None:
     """
     Only aggregates on axis=0
@@ -1524,15 +1531,19 @@ def group_nth(
             for j in range(K):
                 val = values[i, j]
 
-                if uses_mask:
-                    isna_entry = mask[i, j]
-                else:
-                    isna_entry = _treat_as_na(val, is_datetimelike)
+                if skipna:
+                    if uses_mask:
+                        isna_entry = mask[i, j]
+                    else:
+                        isna_entry = _treat_as_na(val, is_datetimelike)
+                    if isna_entry:
+                        continue
 
-                if not isna_entry:
-                    nobs[lab, j] += 1
-                    if nobs[lab, j] == rank:
-                        resx[lab, j] = val
+                nobs[lab, j] += 1
+                if nobs[lab, j] == rank:
+                    resx[lab, j] = val
+                    if uses_mask and not skipna:
+                        result_mask[lab, j] = mask[i, j]
 
     _check_below_mincount(
         out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 3f5fd2e61b0cb..d187d018840fe 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -235,11 +235,18 @@
         + TIMEDELTA_PYARROW_DTYPES
         + BOOL_PYARROW_DTYPES
     )
+    ALL_REAL_PYARROW_DTYPES_STR_REPR = (
+        ALL_INT_PYARROW_DTYPES_STR_REPR + FLOAT_PYARROW_DTYPES_STR_REPR
+    )
 else:
     FLOAT_PYARROW_DTYPES_STR_REPR = []
     ALL_INT_PYARROW_DTYPES_STR_REPR = []
     ALL_PYARROW_DTYPES = []
+    ALL_REAL_PYARROW_DTYPES_STR_REPR = []
 
+ALL_REAL_NULLABLE_DTYPES = (
+    FLOAT_NUMPY_DTYPES + ALL_REAL_EXTENSION_DTYPES + ALL_REAL_PYARROW_DTYPES_STR_REPR
+)
 
 arithmetic_dunder_methods = [
     "__add__",
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 94805313ccfc1..4f62404733979 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1703,6 +1703,38 @@ def any_numpy_dtype(request):
     return request.param
 
 
+@pytest.fixture(params=tm.ALL_REAL_NULLABLE_DTYPES)
+def any_real_nullable_dtype(request):
+    """
+    Parameterized fixture for all numpy dtypes.
+
+    * float
+    * 'float32'
+    * 'float64'
+    * 'Float32'
+    * 'Float64'
+    * 'UInt8'
+    * 'UInt16'
+    * 'UInt32'
+    * 'UInt64'
+    * 'Int8'
+    * 'Int16'
+    * 'Int32'
+    * 'Int64'
+    * 'uint8[pyarrow]'
+    * 'uint16[pyarrow]'
+    * 'uint32[pyarrow]'
+    * 'uint64[pyarrow]'
+    * 'int8[pyarrow]'
+    * 'int16[pyarrow]'
+    * 'int32[pyarrow]'
+    * 'int64[pyarrow]'
+    * 'float[pyarrow]'
+    * 'double[pyarrow]'
+    """
+    return request.param
+
+
 @pytest.fixture(params=tm.ALL_NUMERIC_DTYPES)
 def any_numeric_dtype(request):
     """
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b2afaffc267fa..45af5c20ed780 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3364,9 +3364,13 @@ def max(
             )
 
     @final
-    def first(self, numeric_only: bool = False, min_count: int = -1) -> NDFrameT:
+    def first(
+        self, numeric_only: bool = False, min_count: int = -1, skipna: bool = True
+    ) -> NDFrameT:
         """
-        Compute the first non-null entry of each column.
+        Compute the first entry of each column within each group.
+
+        Defaults to skipping NA elements.
 
         Parameters
         ----------
@@ -3374,12 +3378,15 @@ def first(self, numeric_only: bool = False, min_count: int = -1) -> NDFrameT:
             Include only float, int, boolean columns.
         min_count : int, default -1
             The required number of valid values to perform the operation. If fewer
-            than ``min_count`` non-NA values are present the result will be NA.
+            than ``min_count`` valid values are present the result will be NA.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
 
         Returns
         -------
         Series or DataFrame
-            First non-null of values within each group.
+            First values within each group.
 
         See Also
         --------
@@ -3431,12 +3438,17 @@ def first(x: Series):
             min_count=min_count,
             alias="first",
             npfunc=first_compat,
+            skipna=skipna,
         )
 
     @final
-    def last(self, numeric_only: bool = False, min_count: int = -1) -> NDFrameT:
+    def last(
+        self, numeric_only: bool = False, min_count: int = -1, skipna: bool = True
+    ) -> NDFrameT:
         """
-        Compute the last non-null entry of each column.
+        Compute the last entry of each column within each group.
+
+        Defaults to skipping NA elements.
 
         Parameters
         ----------
@@ -3445,12 +3457,15 @@ def last(self, numeric_only: bool = False, min_count: int = -1) -> NDFrameT:
             everything, then use only numeric data.
         min_count : int, default -1
             The required number of valid values to perform the operation. If fewer
-            than ``min_count`` non-NA values are present the result will be NA.
+            than ``min_count`` valid values are present the result will be NA.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
 
         Returns
         -------
         Series or DataFrame
-            Last non-null of values within each group.
+            Last of values within each group.
 
         See Also
         --------
@@ -3490,6 +3505,7 @@ def last(x: Series):
             min_count=min_count,
             alias="last",
             npfunc=last_compat,
+            skipna=skipna,
         )
 
     @final
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index d24a2a26bba81..f330101c1d3a8 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -7,6 +7,8 @@
 
 from pandas._libs.tslibs import iNaT
 
+from pandas.core.dtypes.common import is_extension_array_dtype
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -389,6 +391,36 @@ def test_groupby_non_arithmetic_agg_int_like_precision(method, data):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("how", ["first", "last"])
+def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how):
+    if is_extension_array_dtype(any_real_nullable_dtype):
+        na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
+    else:
+        na_value = np.nan
+    df = DataFrame(
+        {
+            "a": [2, 1, 1, 2],
+            "b": [na_value, 3.0, na_value, 4.0],
+            "c": [na_value, 3.0, na_value, 4.0],
+        },
+        dtype=any_real_nullable_dtype,
+    )
+    gb = df.groupby("a", sort=sort)
+    method = getattr(gb, how)
+    result = method(skipna=skipna)
+
+    ilocs = {
+        ("first", True): [3, 1],
+        ("first", False): [0, 1],
+        ("last", True): [3, 1],
+        ("last", False): [3, 2],
+    }[how, skipna]
+    expected = df.iloc[ilocs].set_index("a")
+    if sort:
+        expected = expected.sort_index()
+    tm.assert_frame_equal(result, expected)
+
+
 def test_idxmin_idxmax_axis1():
     df = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]

From b12541bdf59799c49c2e4974a7cae4a05e3509a3 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 27 Jan 2024 12:35:46 -0500
Subject: [PATCH 2/6] resample & tests

---
 doc/source/whatsnew/v2.2.1.rst          |  1 +
 pandas/core/resample.py                 | 10 +++++++--
 pandas/tests/groupby/test_reductions.py |  1 +
 pandas/tests/resample/test_base.py      | 29 +++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
index 1100a3b3972e4..2704d9b9a9a6b 100644
--- a/doc/source/whatsnew/v2.2.1.rst
+++ b/doc/source/whatsnew/v2.2.1.rst
@@ -33,6 +33,7 @@ Bug fixes
 Other
 ~~~~~
 - Added the argument ``skipna`` to :meth:`DataFrameGroupBy.first`, :meth:`DataFrameGroupBy.last`, :meth:`SeriesGroupBy.first`, and :meth:`SeriesGroupBy.last`; achieving ``skipna=False`` used to be available via :meth:`DataFrameGroupBy.nth`, but the behavior was changed in pandas 2.0.0 (:issue:`57019`)
+- Added the argument ``skipna`` to :meth:`Resampler.first`, :meth:`Resampler.last` (:issue:`57019`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_221.contributors:
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 082196abc17c2..4d6507d89ec90 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1329,12 +1329,15 @@ def first(
         self,
         numeric_only: bool = False,
         min_count: int = 0,
+        skipna: bool = True,
         *args,
         **kwargs,
     ):
         maybe_warn_args_and_kwargs(type(self), "first", args, kwargs)
         nv.validate_resampler_func("first", args, kwargs)
-        return self._downsample("first", numeric_only=numeric_only, min_count=min_count)
+        return self._downsample(
+            "first", numeric_only=numeric_only, min_count=min_count, skipna=skipna
+        )
 
     @final
     @doc(GroupBy.last)
@@ -1342,12 +1345,15 @@ def last(
         self,
         numeric_only: bool = False,
         min_count: int = 0,
+        skipna: bool = True,
         *args,
         **kwargs,
     ):
         maybe_warn_args_and_kwargs(type(self), "last", args, kwargs)
         nv.validate_resampler_func("last", args, kwargs)
-        return self._downsample("last", numeric_only=numeric_only, min_count=min_count)
+        return self._downsample(
+            "last", numeric_only=numeric_only, min_count=min_count, skipna=skipna
+        )
 
     @final
     @doc(GroupBy.median)
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index f330101c1d3a8..06c2edf31b334 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -393,6 +393,7 @@ def test_groupby_non_arithmetic_agg_int_like_precision(method, data):
 
 @pytest.mark.parametrize("how", ["first", "last"])
 def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how):
+    # GH#57019
     if is_extension_array_dtype(any_real_nullable_dtype):
         na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
     else:
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index ab75dd7469b73..9cd51b95d6efd 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -3,6 +3,9 @@
 import numpy as np
 import pytest
 
+from pandas.core.dtypes.common import is_extension_array_dtype
+
+import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -459,3 +462,29 @@ def test_resample_quantile(index):
         result = ser.resample(freq).quantile(q)
         expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("how", ["first", "last"])
+def test_first_last_skipna(any_real_nullable_dtype, skipna, how):
+    # GH#57019
+    if is_extension_array_dtype(any_real_nullable_dtype):
+        na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
+    else:
+        na_value = np.nan
+    df = DataFrame(
+        {
+            "a": [2, 1, 1, 2],
+            "b": [na_value, 3.0, na_value, 4.0],
+            "c": [na_value, 3.0, na_value, 4.0],
+        },
+        index=date_range("2020-01-01", periods=4, freq="D"),
+        dtype=any_real_nullable_dtype,
+    )
+    rs = df.resample("ME")
+    method = getattr(rs, how)
+    result = method(skipna=skipna)
+
+    gb = df.groupby(df.shape[0] * [pd.to_datetime("2020-01-31")])
+    expected = getattr(gb, how)(skipna=skipna)
+    expected.index.freq = "ME"
+    tm.assert_frame_equal(result, expected)

From 207be1276ac88754bd7eb259e2eccc3307559b05 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 27 Jan 2024 12:39:55 -0500
Subject: [PATCH 3/6] Improve test

---
 pandas/tests/groupby/test_reductions.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 06c2edf31b334..c458b6ab9d96d 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -400,9 +400,9 @@ def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how):
         na_value = np.nan
     df = DataFrame(
         {
-            "a": [2, 1, 1, 2],
-            "b": [na_value, 3.0, na_value, 4.0],
-            "c": [na_value, 3.0, na_value, 4.0],
+            "a": [2, 1, 1, 2, 3, 3],
+            "b": [na_value, 3.0, na_value, 4.0, np.nan, np.nan],
+            "c": [na_value, 3.0, na_value, 4.0, np.nan, np.nan],
         },
         dtype=any_real_nullable_dtype,
     )
@@ -411,10 +411,10 @@ def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how):
     result = method(skipna=skipna)
 
     ilocs = {
-        ("first", True): [3, 1],
-        ("first", False): [0, 1],
-        ("last", True): [3, 1],
-        ("last", False): [3, 2],
+        ("first", True): [3, 1, 4],
+        ("first", False): [0, 1, 4],
+        ("last", True): [3, 1, 5],
+        ("last", False): [3, 2, 5],
     }[how, skipna]
     expected = df.iloc[ilocs].set_index("a")
     if sort:

From b20733720056d71c625c8fb45622b07fd8dd87d4 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 27 Jan 2024 12:47:08 -0500
Subject: [PATCH 4/6] Fixups

---
 pandas/conftest.py             | 2 +-
 pandas/core/groupby/groupby.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 4f62404733979..c5dc48b9ed096 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1706,7 +1706,7 @@ def any_numpy_dtype(request):
 @pytest.fixture(params=tm.ALL_REAL_NULLABLE_DTYPES)
 def any_real_nullable_dtype(request):
     """
-    Parameterized fixture for all numpy dtypes.
+    Parameterized fixture for all real dtypes that can hold NA.
 
     * float
     * 'float32'
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 45af5c20ed780..7227d5c727994 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3383,6 +3383,8 @@ def first(
             Exclude NA/null values. If an entire row/column is NA, the result
             will be NA.
 
+            .. versionadded:: 2.2.1
+
         Returns
         -------
         Series or DataFrame
@@ -3462,6 +3464,8 @@ def last(
             Exclude NA/null values. If an entire row/column is NA, the result
             will be NA.
 
+            .. versionadded:: 2.2.1
+
         Returns
         -------
         Series or DataFrame

From a634d44a967f8b0091505a0c73b9b24c258eec86 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 28 Jan 2024 09:17:57 -0500
Subject: [PATCH 5/6] fixup test

---
 pandas/tests/resample/test_resample_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index d3e906827b754..12abd1c98784b 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -1040,11 +1040,11 @@ def test_args_kwargs_depr(method, raises):
     if raises:
         with tm.assert_produces_warning(FutureWarning, match=warn_msg):
             with pytest.raises(UnsupportedFunctionCall, match=error_msg):
-                func(*args, 1, 2, 3)
+                func(*args, 1, 2, 3, 4)
     else:
         with tm.assert_produces_warning(FutureWarning, match=warn_msg):
             with pytest.raises(TypeError, match=error_msg_type):
-                func(*args, 1, 2, 3)
+                func(*args, 1, 2, 3, 4)
 
 
 def test_df_axis_param_depr():

From b3bd9bb5806159220eb8c6d9eca2082f22162d5f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 29 Jan 2024 17:08:11 -0500
Subject: [PATCH 6/6] Rework na_value determination

---
 pandas/tests/groupby/test_reductions.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index c458b6ab9d96d..bd188c729846c 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -7,7 +7,8 @@
 
 from pandas._libs.tslibs import iNaT
 
-from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.common import pandas_dtype
+from pandas.core.dtypes.missing import na_value_for_dtype
 
 import pandas as pd
 from pandas import (
@@ -394,10 +395,7 @@ def test_groupby_non_arithmetic_agg_int_like_precision(method, data):
 @pytest.mark.parametrize("how", ["first", "last"])
 def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how):
     # GH#57019
-    if is_extension_array_dtype(any_real_nullable_dtype):
-        na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
-    else:
-        na_value = np.nan
+    na_value = na_value_for_dtype(pandas_dtype(any_real_nullable_dtype))
     df = DataFrame(
         {
             "a": [2, 1, 1, 2, 3, 3],