From 3a5fc90e8e2e7fa590d081f076b4df1676d6dd06 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sat, 18 May 2024 12:30:35 +0800
Subject: [PATCH 01/12] add *args for raw numba apply

---
 pandas/core/_numba/executor.py         |  8 ++++----
 pandas/core/apply.py                   |  7 +++----
 pandas/tests/apply/test_frame_apply.py | 16 ++++++++++++++++
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py
index 0a26acb7df60a..9935ed5df5afa 100644
--- a/pandas/core/_numba/executor.py
+++ b/pandas/core/_numba/executor.py
@@ -24,7 +24,7 @@ def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
     nb_compat_func = numba.extending.register_jitable(func)
 
     @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-    def nb_looper(values, axis):
+    def nb_looper(values, axis, *args):
         # Operate on the first row/col in order to get
         # the output shape
         if axis == 0:
@@ -33,7 +33,7 @@ def nb_looper(values, axis):
         else:
             first_elem = values[0]
             dim0 = values.shape[0]
-        res0 = nb_compat_func(first_elem)
+        res0 = nb_compat_func(first_elem, *args)
         # Use np.asarray to get shape for
         # https://github.com/numba/numba/issues/4202#issuecomment-1185981507
         buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape
@@ -44,11 +44,11 @@ def nb_looper(values, axis):
         if axis == 1:
             buff[0] = res0
             for i in numba.prange(1, values.shape[0]):
-                buff[i] = nb_compat_func(values[i])
+                buff[i] = nb_compat_func(values[i], *args)
         else:
             buff[:, 0] = res0
             for j in numba.prange(1, values.shape[1]):
-                buff[:, j] = nb_compat_func(values[:, j])
+                buff[:, j] = nb_compat_func(values[:, j], *args)
         return buff
 
     return nb_looper
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 32e8aea7ea8ab..24da3850152b0 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -51,6 +51,7 @@
 from pandas.core._numba.executor import generate_apply_looper
 import pandas.core.common as com
 from pandas.core.construction import ensure_wrapped_if_datetimelike
+from pandas.core.util.numba_ import get_jit_arguments
 
 if TYPE_CHECKING:
     from collections.abc import (
@@ -972,17 +973,15 @@ def wrapper(*args, **kwargs):
             return wrapper
 
         if engine == "numba":
-            engine_kwargs = {} if engine_kwargs is None else engine_kwargs
-
             # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
             # incompatible type "Callable[..., Any] | str | list[Callable
             # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
             # list[Callable[..., Any] | str]]"; expected "Hashable"
             nb_looper = generate_apply_looper(
                 self.func,  # type: ignore[arg-type]
-                **engine_kwargs,
+                **get_jit_arguments(engine_kwargs, self.kwargs),
             )
-            result = nb_looper(self.values, self.axis)
+            result = nb_looper(self.values, self.axis, *self.args)
             # If we made the result 2-D, squeeze it back to 1-D
             result = np.squeeze(result)
         else:
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index cbc68265a1cc1..27f0779ebef5f 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1718,3 +1718,19 @@ def test_agg_dist_like_and_nonunique_columns():
     result = df.agg({"A": "count"})
     expected = df["A"].count()
     tm.assert_series_equal(result, expected)
+
+
+def test_numba_raw_apply_with_args():
+    # GH:58712
+    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine="numba", raw=True)
+    # note:
+    # result is always float dtype, see core._numba.executor.py:generate_apply_looper
+    expected = df + 3.0
+    tm.assert_frame_equal(result, expected)
+
+    with pytest.raises(
+        pd.errors.NumbaUtilError,
+        match="numba does not support kwargs with nopython=True",
+    ):
+        df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine="numba", raw=True)

From 3165efe07f265af5a5f38b0f6e7808167e7cffd9 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sat, 18 May 2024 13:52:55 +0800
Subject: [PATCH 02/12] add whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 731406394ed46..7b39104bb2c66 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -498,6 +498,7 @@ Other
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
+- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` and ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)

From de8957467eb3ec077a320f294f36fff9d2359a34 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sat, 18 May 2024 14:16:16 +0800
Subject: [PATCH 03/12] fix test_case

---
 pandas/tests/apply/test_frame_apply.py | 27 +++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 27f0779ebef5f..ca921d98088d2 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1720,17 +1720,18 @@ def test_agg_dist_like_and_nonunique_columns():
     tm.assert_series_equal(result, expected)
 
 
-def test_numba_raw_apply_with_args():
-    # GH:58712
-    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
-    result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine="numba", raw=True)
-    # note:
-    # result is always float dtype, see core._numba.executor.py:generate_apply_looper
-    expected = df + 3.0
-    tm.assert_frame_equal(result, expected)
+def test_numba_raw_apply_with_args(engine):
+    if engine == "numba":
+        # GH:58712
+        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True)
+        # note:
+        # result is always float dtype, see core._numba.executor.py:generate_apply_looper
+        expected = df + 3.0
+        tm.assert_frame_equal(result, expected)
 
-    with pytest.raises(
-        pd.errors.NumbaUtilError,
-        match="numba does not support kwargs with nopython=True",
-    ):
-        df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine="numba", raw=True)
+        with pytest.raises(
+            pd.errors.NumbaUtilError,
+            match="numba does not support kwargs with nopython=True",
+        ):
+            df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=True)

From 3f13b303114b7a1fe2671b58327e1cf0a29fe2a3 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sat, 18 May 2024 14:19:50 +0800
Subject: [PATCH 04/12] fix pre-commit

---
 pandas/tests/apply/test_frame_apply.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index ca921d98088d2..de2d15c3760de 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1724,9 +1724,11 @@ def test_numba_raw_apply_with_args(engine):
     if engine == "numba":
         # GH:58712
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
-        result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True)
-        # note:
-        # result is always float dtype, see core._numba.executor.py:generate_apply_looper
+        result = df.apply(
+            lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True
+        )
+        # note: result is always float dtype,
+        # see core._numba.executor.py:generate_apply_looper
         expected = df + 3.0
         tm.assert_frame_equal(result, expected)
 

From c0268459694c02662f1e263625e0197212b486f5 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sat, 18 May 2024 22:11:57 +0800
Subject: [PATCH 05/12] fix test case

---
 pandas/tests/apply/test_frame_apply.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index de2d15c3760de..3ff156cac3e35 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -64,7 +64,7 @@ def test_apply(float_frame, engine, request):
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize("raw", [True, False])
 def test_apply_args(float_frame, axis, raw, engine, request):
-    if engine == "numba":
+    if engine == "numba" and raw is False:
         mark = pytest.mark.xfail(reason="numba engine doesn't support args")
         request.node.add_marker(mark)
     result = float_frame.apply(

From 96581a316ad9e3d6acb62e724af9cb2c273d9fb2 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sun, 19 May 2024 11:53:23 +0800
Subject: [PATCH 06/12] add *args for raw=False as well; merge tests together

---
 doc/source/whatsnew/v3.0.0.rst         |  2 +-
 pandas/core/apply.py                   | 18 ++++++++------
 pandas/tests/apply/test_frame_apply.py | 34 +++++++++-----------------
 3 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7b39104bb2c66..7cb6af877be42 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -498,7 +498,7 @@ Other
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
-- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` and ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
+- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 24da3850152b0..bfc268047b8da 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -1122,21 +1122,22 @@ def generate_numba_apply_func(
         # Currently the parallel argument doesn't get passed through here
         # (it's disabled) since the dicts in numba aren't thread-safe.
         @numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
-        def numba_func(values, col_names, df_index):
+        def numba_func(values, col_names, df_index, *args):
             results = {}
             for j in range(values.shape[1]):
                 # Create the series
                 ser = Series(
                     values[:, j], index=df_index, name=maybe_cast_str(col_names[j])
                 )
-                results[j] = jitted_udf(ser)
+                results[j] = jitted_udf(ser, *args)
             return results
 
         return numba_func
 
     def apply_with_numba(self) -> dict[int, Any]:
         nb_func = self.generate_numba_apply_func(
-            cast(Callable, self.func), **self.engine_kwargs
+            cast(Callable, self.func),
+            **get_jit_arguments(self.engine_kwargs, self.kwargs),
         )
         from pandas.core._numba.extensions import set_numba_data
 
@@ -1151,7 +1152,7 @@ def apply_with_numba(self) -> dict[int, Any]:
         # Convert from numba dict to regular dict
         # Our isinstance checks in the df constructor don't pass for numbas typed dict
         with set_numba_data(index) as index, set_numba_data(columns) as columns:
-            res = dict(nb_func(self.values, columns, index))
+            res = dict(nb_func(self.values, columns, index, *self.args))
         return res
 
     @property
@@ -1259,7 +1260,7 @@ def generate_numba_apply_func(
         jitted_udf = numba.extending.register_jitable(func)
 
         @numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
-        def numba_func(values, col_names_index, index):
+        def numba_func(values, col_names_index, index, *args):
             results = {}
             # Currently the parallel argument doesn't get passed through here
             # (it's disabled) since the dicts in numba aren't thread-safe.
@@ -1271,7 +1272,7 @@ def numba_func(values, col_names_index, index):
                     index=col_names_index,
                     name=maybe_cast_str(index[i]),
                 )
-                results[i] = jitted_udf(ser)
+                results[i] = jitted_udf(ser, *args)
 
             return results
 
@@ -1279,7 +1280,8 @@ def numba_func(values, col_names_index, index):
 
     def apply_with_numba(self) -> dict[int, Any]:
         nb_func = self.generate_numba_apply_func(
-            cast(Callable, self.func), **self.engine_kwargs
+            cast(Callable, self.func),
+            **get_jit_arguments(self.engine_kwargs, self.kwargs),
         )
 
         from pandas.core._numba.extensions import set_numba_data
@@ -1290,7 +1292,7 @@ def apply_with_numba(self) -> dict[int, Any]:
             set_numba_data(self.obj.index) as index,
             set_numba_data(self.columns) as columns,
         ):
-            res = dict(nb_func(self.values, columns, index))
+            res = dict(nb_func(self.values, columns, index, *self.args))
 
         return res
 
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3ff156cac3e35..32a1ed596380a 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -63,16 +63,23 @@ def test_apply(float_frame, engine, request):
 
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize("raw", [True, False])
-def test_apply_args(float_frame, axis, raw, engine, request):
-    if engine == "numba" and raw is False:
-        mark = pytest.mark.xfail(reason="numba engine doesn't support args")
-        request.node.add_marker(mark)
+def test_apply_args(float_frame, axis, raw, engine):
+    # GH:58712
     result = float_frame.apply(
         lambda x, y: x + y, axis, args=(1,), raw=raw, engine=engine
     )
     expected = float_frame + 1
     tm.assert_frame_equal(result, expected)
 
+    if engine == "numba":
+        with pytest.raises(
+            pd.errors.NumbaUtilError,
+            match="numba does not support kwargs with nopython=True",
+        ):
+            float_frame.apply(
+                lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw
+            )
+
 
 def test_apply_categorical_func():
     # GH 9573
@@ -1718,22 +1725,3 @@ def test_agg_dist_like_and_nonunique_columns():
     result = df.agg({"A": "count"})
     expected = df["A"].count()
     tm.assert_series_equal(result, expected)
-
-
-def test_numba_raw_apply_with_args(engine):
-    if engine == "numba":
-        # GH:58712
-        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
-        result = df.apply(
-            lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True
-        )
-        # note: result is always float dtype,
-        # see core._numba.executor.py:generate_apply_looper
-        expected = df + 3.0
-        tm.assert_frame_equal(result, expected)
-
-        with pytest.raises(
-            pd.errors.NumbaUtilError,
-            match="numba does not support kwargs with nopython=True",
-        ):
-            df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=True)

From 2aae933f96a9e11c10f8a98f71f49d9b565fd9be Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Tue, 21 May 2024 09:21:25 +0800
Subject: [PATCH 07/12] add prepare_function_arguments

---
 pandas/core/_numba/executor.py         |  4 ++-
 pandas/core/apply.py                   | 20 ++++++++----
 pandas/core/util/numba_.py             | 45 ++++++++++++++++++++++++++
 pandas/tests/apply/test_frame_apply.py | 19 +++++++++--
 4 files changed, 78 insertions(+), 10 deletions(-)

diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py
index 9935ed5df5afa..82fd4e34ac67b 100644
--- a/pandas/core/_numba/executor.py
+++ b/pandas/core/_numba/executor.py
@@ -14,6 +14,8 @@
 
 from pandas.compat._optional import import_optional_dependency
 
+from pandas.core.util.numba_ import jit_user_function
+
 
 @functools.cache
 def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
@@ -21,7 +23,7 @@ def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
         import numba
     else:
         numba = import_optional_dependency("numba")
-    nb_compat_func = numba.extending.register_jitable(func)
+    nb_compat_func = jit_user_function(func)
 
     @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
     def nb_looper(values, axis, *args):
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index bfc268047b8da..7137fc8d71c71 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -51,7 +51,10 @@
 from pandas.core._numba.executor import generate_apply_looper
 import pandas.core.common as com
 from pandas.core.construction import ensure_wrapped_if_datetimelike
-from pandas.core.util.numba_ import get_jit_arguments
+from pandas.core.util.numba_ import (
+    get_jit_arguments,
+    prepare_function_arguments,
+)
 
 if TYPE_CHECKING:
     from collections.abc import (
@@ -973,15 +976,16 @@ def wrapper(*args, **kwargs):
             return wrapper
 
         if engine == "numba":
+            args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs)
             # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
             # incompatible type "Callable[..., Any] | str | list[Callable
             # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
             # list[Callable[..., Any] | str]]"; expected "Hashable"
             nb_looper = generate_apply_looper(
                 self.func,  # type: ignore[arg-type]
-                **get_jit_arguments(engine_kwargs, self.kwargs),
+                **get_jit_arguments(engine_kwargs, kwargs),
             )
-            result = nb_looper(self.values, self.axis, *self.args)
+            result = nb_looper(self.values, self.axis, *args)
             # If we made the result 2-D, squeeze it back to 1-D
             result = np.squeeze(result)
         else:
@@ -1135,9 +1139,10 @@ def numba_func(values, col_names, df_index, *args):
         return numba_func
 
     def apply_with_numba(self) -> dict[int, Any]:
+        args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs)
         nb_func = self.generate_numba_apply_func(
             cast(Callable, self.func),
-            **get_jit_arguments(self.engine_kwargs, self.kwargs),
+            **get_jit_arguments(self.engine_kwargs, kwargs),
         )
         from pandas.core._numba.extensions import set_numba_data
 
@@ -1152,7 +1157,7 @@ def apply_with_numba(self) -> dict[int, Any]:
         # Convert from numba dict to regular dict
         # Our isinstance checks in the df constructor don't pass for numbas typed dict
         with set_numba_data(index) as index, set_numba_data(columns) as columns:
-            res = dict(nb_func(self.values, columns, index, *self.args))
+            res = dict(nb_func(self.values, columns, index, *args))
         return res
 
     @property
@@ -1279,9 +1284,10 @@ def numba_func(values, col_names_index, index, *args):
         return numba_func
 
     def apply_with_numba(self) -> dict[int, Any]:
+        args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs)
         nb_func = self.generate_numba_apply_func(
             cast(Callable, self.func),
-            **get_jit_arguments(self.engine_kwargs, self.kwargs),
+            **get_jit_arguments(self.engine_kwargs, kwargs),
         )
 
         from pandas.core._numba.extensions import set_numba_data
@@ -1292,7 +1298,7 @@ def apply_with_numba(self) -> dict[int, Any]:
             set_numba_data(self.obj.index) as index,
             set_numba_data(self.columns) as columns,
         ):
-            res = dict(nb_func(self.values, columns, index, *self.args))
+            res = dict(nb_func(self.values, columns, index, *args))
 
         return res
 
diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py
index a6079785e7475..da02c4b5ccf34 100644
--- a/pandas/core/util/numba_.py
+++ b/pandas/core/util/numba_.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import inspect
 import types
 from typing import (
     TYPE_CHECKING,
@@ -97,3 +98,47 @@ def jit_user_function(func: Callable) -> Callable:
         numba_func = numba.extending.register_jitable(func)
 
     return numba_func
+
+
+_sentinel = object()
+
+
+def prepare_function_arguments(
+    func: Callable, args: tuple, kwargs: dict
+) -> tuple[tuple, dict]:
+    """
+    Prepare arguments for jitted function. As numba functions do not support kwargs,
+    we try to move kwargs into args if possible.
+
+    Parameters
+    ----------
+    func : function
+        user defined function
+    args : tuple
+        user input positional arguments
+    kwargs : dict
+        user input keyword arguments
+
+    Returns
+    -------
+    tuple[tuple, dict]
+        args, kwargs
+
+    """
+    if not kwargs:
+        return args, kwargs
+
+    # the udf should have this pattern: def udf(value, *args, **kwargs):...
+    signature = inspect.signature(func)
+    arguments = signature.bind(_sentinel, *args, **kwargs)
+    arguments.apply_defaults()
+    # Ref: https://peps.python.org/pep-0362/
+    # Arguments which could be passed as part of either *args or **kwargs
+    # will be included only in the BoundArguments.args attribute.
+    args = arguments.args
+    kwargs = arguments.kwargs
+
+    assert args[0] is _sentinel
+    args = args[1:]
+
+    return args, kwargs
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 32a1ed596380a..003c3594a9441 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -64,20 +64,35 @@ def test_apply(float_frame, engine, request):
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize("raw", [True, False])
 def test_apply_args(float_frame, axis, raw, engine):
-    # GH:58712
     result = float_frame.apply(
         lambda x, y: x + y, axis, args=(1,), raw=raw, engine=engine
     )
     expected = float_frame + 1
     tm.assert_frame_equal(result, expected)
 
+    # GH:58712
+    result = float_frame.apply(
+        lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw
+    )
+    expected = float_frame + 3
+    tm.assert_frame_equal(result, expected)
+
     if engine == "numba":
+        # keyword-only arguments are not supported in numba
+        with pytest.raises(
+            pd.errors.NumbaUtilError,
+            match="numba does not support kwargs with nopython=True",
+        ):
+            float_frame.apply(
+                lambda x, a, *, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw
+            )
+
         with pytest.raises(
             pd.errors.NumbaUtilError,
             match="numba does not support kwargs with nopython=True",
         ):
             float_frame.apply(
-                lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw
+                lambda *x, b: x[0] + x[1] + b, args=(1,), b=2, engine=engine, raw=raw
             )
 
 

From 1a6f1aeeba772e4fe6382df2306e4b5d93bf21a3 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Tue, 21 May 2024 10:45:39 +0800
Subject: [PATCH 08/12] fix mypy

---
 pandas/core/apply.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 7137fc8d71c71..4500d4e8a50be 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -74,7 +74,6 @@
     from pandas.core.resample import Resampler
     from pandas.core.window.rolling import BaseWindow
 
-
 ResType = dict[int, Any]
 
 
@@ -976,7 +975,11 @@ def wrapper(*args, **kwargs):
             return wrapper
 
         if engine == "numba":
-            args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs)
+            args, kwargs = prepare_function_arguments(
+                self.func,  # type: ignore[arg-type]
+                self.args,
+                self.kwargs,
+            )
             # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
             # incompatible type "Callable[..., Any] | str | list[Callable
             # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
@@ -1139,10 +1142,10 @@ def numba_func(values, col_names, df_index, *args):
         return numba_func
 
     def apply_with_numba(self) -> dict[int, Any]:
-        args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs)
+        func = cast(Callable, self.func)
+        args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
         nb_func = self.generate_numba_apply_func(
-            cast(Callable, self.func),
-            **get_jit_arguments(self.engine_kwargs, kwargs),
+            func, **get_jit_arguments(self.engine_kwargs, kwargs)
         )
         from pandas.core._numba.extensions import set_numba_data
 
@@ -1284,10 +1287,10 @@ def numba_func(values, col_names_index, index, *args):
         return numba_func
 
     def apply_with_numba(self) -> dict[int, Any]:
-        args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs)
+        func = cast(Callable, self.func)
+        args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
         nb_func = self.generate_numba_apply_func(
-            cast(Callable, self.func),
-            **get_jit_arguments(self.engine_kwargs, kwargs),
+            func, **get_jit_arguments(self.engine_kwargs, kwargs)
         )
 
         from pandas.core._numba.extensions import set_numba_data

From 8925b3a307d0d5667a10b5b10b4f205afca11896 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sun, 26 May 2024 10:31:21 +0800
Subject: [PATCH 09/12] update get_jit_arguments

---
 pandas/core/util/numba_.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py
index da02c4b5ccf34..d93984d210cb4 100644
--- a/pandas/core/util/numba_.py
+++ b/pandas/core/util/numba_.py
@@ -55,10 +55,15 @@ def get_jit_arguments(
         engine_kwargs = {}
 
     nopython = engine_kwargs.get("nopython", True)
-    if kwargs and nopython:
+    if kwargs:
+        # Note: in case numba supports keyword-only arguments in
+        # a future version, we should remove this check. But this
+        # seems unlikely to happen soon.
+
         raise NumbaUtilError(
-            "numba does not support kwargs with nopython=True: "
-            "https://github.com/numba/numba/issues/2916"
+            "numba does not support keyword-only arguments"
+            "https://github.com/numba/numba/issues/2916, "
+            "https://github.com/numba/numba/issues/6846"
         )
     nogil = engine_kwargs.get("nogil", False)
     parallel = engine_kwargs.get("parallel", False)

From 085ae73f68a4c5e4597f542a70bd132af39103f1 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sun, 26 May 2024 10:31:44 +0800
Subject: [PATCH 10/12] add nopython test in `test_apply_args`

---
 pandas/tests/apply/test_frame_apply.py | 36 +++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 003c3594a9441..939997f44c1a9 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -63,16 +63,28 @@ def test_apply(float_frame, engine, request):
 
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize("raw", [True, False])
-def test_apply_args(float_frame, axis, raw, engine):
+@pytest.mark.parametrize("nopython", [True, False])
+def test_apply_args(float_frame, axis, raw, engine, nopython):
+    engine_kwargs = {"nopython": nopython}
     result = float_frame.apply(
-        lambda x, y: x + y, axis, args=(1,), raw=raw, engine=engine
+        lambda x, y: x + y,
+        axis,
+        args=(1,),
+        raw=raw,
+        engine=engine,
+        engine_kwargs=engine_kwargs,
     )
     expected = float_frame + 1
     tm.assert_frame_equal(result, expected)
 
     # GH:58712
     result = float_frame.apply(
-        lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw
+        lambda x, a, b: x + a + b,
+        args=(1,),
+        b=2,
+        raw=raw,
+        engine=engine,
+        engine_kwargs=engine_kwargs,
     )
     expected = float_frame + 3
     tm.assert_frame_equal(result, expected)
@@ -81,18 +93,28 @@ def test_apply_args(float_frame, axis, raw, engine):
         # keyword-only arguments are not supported in numba
         with pytest.raises(
             pd.errors.NumbaUtilError,
-            match="numba does not support kwargs with nopython=True",
+            match="numba does not support keyword-only arguments",
         ):
             float_frame.apply(
-                lambda x, a, *, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw
+                lambda x, a, *, b: x + a + b,
+                args=(1,),
+                b=2,
+                raw=raw,
+                engine=engine,
+                engine_kwargs=engine_kwargs,
             )
 
         with pytest.raises(
             pd.errors.NumbaUtilError,
-            match="numba does not support kwargs with nopython=True",
+            match="numba does not support keyword-only arguments",
         ):
             float_frame.apply(
-                lambda *x, b: x[0] + x[1] + b, args=(1,), b=2, engine=engine, raw=raw
+                lambda *x, b: x[0] + x[1] + b,
+                args=(1,),
+                b=2,
+                raw=raw,
+                engine=engine,
+                engine_kwargs=engine_kwargs,
             )
 
 

From c75e0b72143b96cf18096ef4c828025bc5b4544f Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sun, 26 May 2024 11:27:13 +0800
Subject: [PATCH 11/12] fix test

---
 pandas/tests/window/test_numba.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index 650eb911e410b..62e17db595985 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -304,7 +304,7 @@ def f(x):
 
 @td.skip_if_no("numba")
 def test_invalid_kwargs_nopython():
-    with pytest.raises(NumbaUtilError, match="numba does not support kwargs with"):
+    with pytest.raises(NumbaUtilError, match="numba does not support keyword-only arguments"):
         Series(range(1)).rolling(1).apply(
             lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
         )

From ceb817836778842d4317ab8969dab4609825d377 Mon Sep 17 00:00:00 2001
From: auderson <auderson@qq.com>
Date: Sun, 26 May 2024 11:31:38 +0800
Subject: [PATCH 12/12] fix pre-commit

---
 pandas/tests/window/test_numba.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index 62e17db595985..c743df859337b 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -304,7 +304,9 @@ def f(x):
 
 @td.skip_if_no("numba")
 def test_invalid_kwargs_nopython():
-    with pytest.raises(NumbaUtilError, match="numba does not support keyword-only arguments"):
+    with pytest.raises(
+        NumbaUtilError, match="numba does not support keyword-only arguments"
+    ):
         Series(range(1)).rolling(1).apply(
             lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
         )