From 3a5fc90e8e2e7fa590d081f076b4df1676d6dd06 Mon Sep 17 00:00:00 2001 From: auderson Date: Sat, 18 May 2024 12:30:35 +0800 Subject: [PATCH 01/12] add *args for raw numba apply --- pandas/core/_numba/executor.py | 8 ++++---- pandas/core/apply.py | 7 +++---- pandas/tests/apply/test_frame_apply.py | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py index 0a26acb7df60a..9935ed5df5afa 100644 --- a/pandas/core/_numba/executor.py +++ b/pandas/core/_numba/executor.py @@ -24,7 +24,7 @@ def generate_apply_looper(func, nopython=True, nogil=True, parallel=False): nb_compat_func = numba.extending.register_jitable(func) @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) - def nb_looper(values, axis): + def nb_looper(values, axis, *args): # Operate on the first row/col in order to get # the output shape if axis == 0: @@ -33,7 +33,7 @@ def nb_looper(values, axis): else: first_elem = values[0] dim0 = values.shape[0] - res0 = nb_compat_func(first_elem) + res0 = nb_compat_func(first_elem, *args) # Use np.asarray to get shape for # https://github.com/numba/numba/issues/4202#issuecomment-1185981507 buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape @@ -44,11 +44,11 @@ def nb_looper(values, axis): if axis == 1: buff[0] = res0 for i in numba.prange(1, values.shape[0]): - buff[i] = nb_compat_func(values[i]) + buff[i] = nb_compat_func(values[i], *args) else: buff[:, 0] = res0 for j in numba.prange(1, values.shape[1]): - buff[:, j] = nb_compat_func(values[:, j]) + buff[:, j] = nb_compat_func(values[:, j], *args) return buff return nb_looper diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 32e8aea7ea8ab..24da3850152b0 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -51,6 +51,7 @@ from pandas.core._numba.executor import generate_apply_looper import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.util.numba_ import get_jit_arguments if TYPE_CHECKING: from collections.abc import ( @@ -972,17 +973,15 @@ def wrapper(*args, **kwargs): return wrapper if engine == "numba": - engine_kwargs = {} if engine_kwargs is None else engine_kwargs - # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has # incompatible type "Callable[..., Any] | str | list[Callable # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str | # list[Callable[..., Any] | str]]"; expected "Hashable" nb_looper = generate_apply_looper( self.func, # type: ignore[arg-type] - **engine_kwargs, + **get_jit_arguments(engine_kwargs, self.kwargs), ) - result = nb_looper(self.values, self.axis) + result = nb_looper(self.values, self.axis, *self.args) # If we made the result 2-D, squeeze it back to 1-D result = np.squeeze(result) else: diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index cbc68265a1cc1..27f0779ebef5f 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1718,3 +1718,19 @@ def test_agg_dist_like_and_nonunique_columns(): result = df.agg({"A": "count"}) expected = df["A"].count() tm.assert_series_equal(result, expected) + + +def test_numba_raw_apply_with_args(): + # GH:58712 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine="numba", raw=True) + # note: + # result is always float dtype, see core._numba.executor.py:generate_apply_looper + expected = df + 3.0 + tm.assert_frame_equal(result, expected) + + with pytest.raises( + pd.errors.NumbaUtilError, + match="numba does not support kwargs with nopython=True", + ): + df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine="numba", raw=True) From 3165efe07f265af5a5f38b0f6e7808167e7cffd9 Mon Sep 17 00:00:00 2001 From: auderson Date: Sat, 18 May 2024 13:52:55 +0800 Subject: [PATCH 02/12] add whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 731406394ed46..7b39104bb2c66 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -498,6 +498,7 @@ Other - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) +- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` and ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) From de8957467eb3ec077a320f294f36fff9d2359a34 Mon Sep 17 00:00:00 2001 From: auderson Date: Sat, 18 May 2024 14:16:16 +0800 Subject: [PATCH 03/12] fix test_case --- pandas/tests/apply/test_frame_apply.py | 27 +++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 27f0779ebef5f..ca921d98088d2 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1720,17 +1720,18 @@ def test_agg_dist_like_and_nonunique_columns(): tm.assert_series_equal(result, expected) -def test_numba_raw_apply_with_args(): - # GH:58712 - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine="numba", raw=True) - # note: - # result is always float dtype, see core._numba.executor.py:generate_apply_looper - expected = df + 3.0 - tm.assert_frame_equal(result, expected) +def test_numba_raw_apply_with_args(engine): + if engine == "numba": + # GH:58712 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True) + # note: + # result is always float dtype, see core._numba.executor.py:generate_apply_looper + expected = df + 3.0 + tm.assert_frame_equal(result, expected) - with pytest.raises( - pd.errors.NumbaUtilError, - match="numba does not support kwargs with nopython=True", - ): - df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine="numba", raw=True) + with pytest.raises( + pd.errors.NumbaUtilError, + match="numba does not support kwargs with nopython=True", + ): + df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=True) From 3f13b303114b7a1fe2671b58327e1cf0a29fe2a3 Mon Sep 17 00:00:00 2001 From: auderson Date: Sat, 18 May 2024 14:19:50 +0800 Subject: [PATCH 04/12] fix pre-commit --- pandas/tests/apply/test_frame_apply.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index ca921d98088d2..de2d15c3760de 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1724,9 +1724,11 @@ def test_numba_raw_apply_with_args(engine): if engine == "numba": # GH:58712 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True) - # note: - # result is always float dtype, see core._numba.executor.py:generate_apply_looper + result = df.apply( + lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True + ) + # note: result is always float dtype, + # see core._numba.executor.py:generate_apply_looper expected = df + 3.0 tm.assert_frame_equal(result, expected) From c0268459694c02662f1e263625e0197212b486f5 Mon Sep 17 00:00:00 2001 From: auderson Date: Sat, 18 May 2024 22:11:57 +0800 Subject: [PATCH 05/12] fix test case --- pandas/tests/apply/test_frame_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index de2d15c3760de..3ff156cac3e35 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -64,7 +64,7 @@ def test_apply(float_frame, engine, request): @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("raw", [True, False]) def test_apply_args(float_frame, axis, raw, engine, request): - if engine == "numba": + if engine == "numba" and raw is False: mark = pytest.mark.xfail(reason="numba engine doesn't support args") request.node.add_marker(mark) result = float_frame.apply( From 96581a316ad9e3d6acb62e724af9cb2c273d9fb2 Mon Sep 17 00:00:00 2001 From: auderson Date: Sun, 19 May 2024 11:53:23 +0800 Subject: [PATCH 06/12] add *args for raw=False as well; merge tests together --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/apply.py | 18 ++++++++------ pandas/tests/apply/test_frame_apply.py | 34 +++++++++----------------- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7b39104bb2c66..7cb6af877be42 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -498,7 +498,7 @@ Other - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) -- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` and ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) +- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 24da3850152b0..bfc268047b8da 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1122,21 +1122,22 @@ def generate_numba_apply_func( # Currently the parallel argument doesn't get passed through here # (it's disabled) since the dicts in numba aren't thread-safe. @numba.jit(nogil=nogil, nopython=nopython, parallel=parallel) - def numba_func(values, col_names, df_index): + def numba_func(values, col_names, df_index, *args): results = {} for j in range(values.shape[1]): # Create the series ser = Series( values[:, j], index=df_index, name=maybe_cast_str(col_names[j]) ) - results[j] = jitted_udf(ser) + results[j] = jitted_udf(ser, *args) return results return numba_func def apply_with_numba(self) -> dict[int, Any]: nb_func = self.generate_numba_apply_func( - cast(Callable, self.func), **self.engine_kwargs + cast(Callable, self.func), + **get_jit_arguments(self.engine_kwargs, self.kwargs), ) from pandas.core._numba.extensions import set_numba_data @@ -1151,7 +1152,7 @@ def apply_with_numba(self) -> dict[int, Any]: # Convert from numba dict to regular dict # Our isinstance checks in the df constructor don't pass for numbas typed dict with set_numba_data(index) as index, set_numba_data(columns) as columns: - res = dict(nb_func(self.values, columns, index)) + res = dict(nb_func(self.values, columns, index, *self.args)) return res @property @@ -1259,7 +1260,7 @@ def generate_numba_apply_func( jitted_udf = numba.extending.register_jitable(func) @numba.jit(nogil=nogil, nopython=nopython, parallel=parallel) - def numba_func(values, col_names_index, index): + def numba_func(values, col_names_index, index, *args): results = {} # Currently the parallel argument doesn't get passed through here # (it's disabled) since the dicts in numba aren't thread-safe. @@ -1271,7 +1272,7 @@ def numba_func(values, col_names_index, index): index=col_names_index, name=maybe_cast_str(index[i]), ) - results[i] = jitted_udf(ser) + results[i] = jitted_udf(ser, *args) return results @@ -1279,7 +1280,8 @@ def numba_func(values, col_names_index, index): def apply_with_numba(self) -> dict[int, Any]: nb_func = self.generate_numba_apply_func( - cast(Callable, self.func), **self.engine_kwargs + cast(Callable, self.func), + **get_jit_arguments(self.engine_kwargs, self.kwargs), ) from pandas.core._numba.extensions import set_numba_data @@ -1290,7 +1292,7 @@ def apply_with_numba(self) -> dict[int, Any]: set_numba_data(self.obj.index) as index, set_numba_data(self.columns) as columns, ): - res = dict(nb_func(self.values, columns, index)) + res = dict(nb_func(self.values, columns, index, *self.args)) return res diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 3ff156cac3e35..32a1ed596380a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -63,16 +63,23 @@ def test_apply(float_frame, engine, request): @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("raw", [True, False]) -def test_apply_args(float_frame, axis, raw, engine, request): - if engine == "numba" and raw is False: - mark = pytest.mark.xfail(reason="numba engine doesn't support args") - request.node.add_marker(mark) +def test_apply_args(float_frame, axis, raw, engine): + # GH:58712 result = float_frame.apply( lambda x, y: x + y, axis, args=(1,), raw=raw, engine=engine ) expected = float_frame + 1 tm.assert_frame_equal(result, expected) + if engine == "numba": + with pytest.raises( + pd.errors.NumbaUtilError, + match="numba does not support kwargs with nopython=True", + ): + float_frame.apply( + lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw + ) + def test_apply_categorical_func(): # GH 9573 @@ -1718,22 +1725,3 @@ def test_agg_dist_like_and_nonunique_columns(): result = df.agg({"A": "count"}) expected = df["A"].count() tm.assert_series_equal(result, expected) - - -def test_numba_raw_apply_with_args(engine): - if engine == "numba": - # GH:58712 - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - result = df.apply( - lambda x, a, b: x + a + b, args=(1, 2), engine=engine, raw=True - ) - # note: result is always float dtype, - # see core._numba.executor.py:generate_apply_looper - expected = df + 3.0 - tm.assert_frame_equal(result, expected) - - with pytest.raises( - pd.errors.NumbaUtilError, - match="numba does not support kwargs with nopython=True", - ): - df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=True) From 2aae933f96a9e11c10f8a98f71f49d9b565fd9be Mon Sep 17 00:00:00 2001 From: auderson Date: Tue, 21 May 2024 09:21:25 +0800 Subject: [PATCH 07/12] add prepare_function_arguments --- pandas/core/_numba/executor.py | 4 ++- pandas/core/apply.py | 20 ++++++++---- pandas/core/util/numba_.py | 45 ++++++++++++++++++++++++++ pandas/tests/apply/test_frame_apply.py | 19 +++++++++-- 4 files changed, 78 insertions(+), 10 deletions(-) diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py index 9935ed5df5afa..82fd4e34ac67b 100644 --- a/pandas/core/_numba/executor.py +++ b/pandas/core/_numba/executor.py @@ -14,6 +14,8 @@ from pandas.compat._optional import import_optional_dependency +from pandas.core.util.numba_ import jit_user_function + @functools.cache def generate_apply_looper(func, nopython=True, nogil=True, parallel=False): @@ -21,7 +23,7 @@ def generate_apply_looper(func, nopython=True, nogil=True, parallel=False): import numba else: numba = import_optional_dependency("numba") - nb_compat_func = numba.extending.register_jitable(func) + nb_compat_func = jit_user_function(func) @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def nb_looper(values, axis, *args): diff --git a/pandas/core/apply.py b/pandas/core/apply.py index bfc268047b8da..7137fc8d71c71 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -51,7 +51,10 @@ from pandas.core._numba.executor import generate_apply_looper import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike -from pandas.core.util.numba_ import get_jit_arguments +from pandas.core.util.numba_ import ( + get_jit_arguments, + prepare_function_arguments, +) if TYPE_CHECKING: from collections.abc import ( @@ -973,15 +976,16 @@ def wrapper(*args, **kwargs): return wrapper if engine == "numba": + args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs) # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has # incompatible type "Callable[..., Any] | str | list[Callable # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str | # list[Callable[..., Any] | str]]"; expected "Hashable" nb_looper = generate_apply_looper( self.func, # type: ignore[arg-type] - **get_jit_arguments(engine_kwargs, self.kwargs), + **get_jit_arguments(engine_kwargs, kwargs), ) - result = nb_looper(self.values, self.axis, *self.args) + result = nb_looper(self.values, self.axis, *args) # If we made the result 2-D, squeeze it back to 1-D result = np.squeeze(result) else: @@ -1135,9 +1139,10 @@ def numba_func(values, col_names, df_index, *args): return numba_func def apply_with_numba(self) -> dict[int, Any]: + args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs) nb_func = self.generate_numba_apply_func( cast(Callable, self.func), - **get_jit_arguments(self.engine_kwargs, self.kwargs), + **get_jit_arguments(self.engine_kwargs, kwargs), ) from pandas.core._numba.extensions import set_numba_data @@ -1152,7 +1157,7 @@ def apply_with_numba(self) -> dict[int, Any]: # Convert from numba dict to regular dict # Our isinstance checks in the df constructor don't pass for numbas typed dict with set_numba_data(index) as index, set_numba_data(columns) as columns: - res = dict(nb_func(self.values, columns, index, *self.args)) + res = dict(nb_func(self.values, columns, index, *args)) return res @property @@ -1279,9 +1284,10 @@ def numba_func(values, col_names_index, index, *args): return numba_func def apply_with_numba(self) -> dict[int, Any]: + args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs) nb_func = self.generate_numba_apply_func( cast(Callable, self.func), - **get_jit_arguments(self.engine_kwargs, self.kwargs), + **get_jit_arguments(self.engine_kwargs, kwargs), ) from pandas.core._numba.extensions import set_numba_data @@ -1292,7 +1298,7 @@ def apply_with_numba(self) -> dict[int, Any]: set_numba_data(self.obj.index) as index, set_numba_data(self.columns) as columns, ): - res = dict(nb_func(self.values, columns, index, *self.args)) + res = dict(nb_func(self.values, columns, index, *args)) return res diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index a6079785e7475..da02c4b5ccf34 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -2,6 +2,7 @@ from __future__ import annotations +import inspect import types from typing import ( TYPE_CHECKING, @@ -97,3 +98,47 @@ def jit_user_function(func: Callable) -> Callable: numba_func = numba.extending.register_jitable(func) return numba_func + + +_sentinel = object() + + +def prepare_function_arguments( + func: Callable, args: tuple, kwargs: dict +) -> tuple[tuple, dict]: + """ + Prepare arguments for jitted function. As numba functions do not support kwargs, + we try to move kwargs into args if possible. + + Parameters + ---------- + func : function + user defined function + args : tuple + user input positional arguments + kwargs : dict + user input keyword arguments + + Returns + ------- + tuple[tuple, dict] + args, kwargs + + """ + if not kwargs: + return args, kwargs + + # the udf should have this pattern: def udf(value, *args, **kwargs):... + signature = inspect.signature(func) + arguments = signature.bind(_sentinel, *args, **kwargs) + arguments.apply_defaults() + # Ref: https://peps.python.org/pep-0362/ + # Arguments which could be passed as part of either *args or **kwargs + # will be included only in the BoundArguments.args attribute. + args = arguments.args + kwargs = arguments.kwargs + + assert args[0] is _sentinel + args = args[1:] + + return args, kwargs diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 32a1ed596380a..003c3594a9441 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -64,20 +64,35 @@ def test_apply(float_frame, engine, request): @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("raw", [True, False]) def test_apply_args(float_frame, axis, raw, engine): - # GH:58712 result = float_frame.apply( lambda x, y: x + y, axis, args=(1,), raw=raw, engine=engine ) expected = float_frame + 1 tm.assert_frame_equal(result, expected) + # GH:58712 + result = float_frame.apply( + lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw + ) + expected = float_frame + 3 + tm.assert_frame_equal(result, expected) + if engine == "numba": + # keyword-only arguments are not supported in numba + with pytest.raises( + pd.errors.NumbaUtilError, + match="numba does not support kwargs with nopython=True", + ): + float_frame.apply( + lambda x, a, *, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw + ) + with pytest.raises( pd.errors.NumbaUtilError, match="numba does not support kwargs with nopython=True", ): float_frame.apply( - lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw + lambda *x, b: x[0] + x[1] + b, args=(1,), b=2, engine=engine, raw=raw ) From 1a6f1aeeba772e4fe6382df2306e4b5d93bf21a3 Mon Sep 17 00:00:00 2001 From: auderson Date: Tue, 21 May 2024 10:45:39 +0800 Subject: [PATCH 08/12] fix mypy --- pandas/core/apply.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7137fc8d71c71..4500d4e8a50be 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -74,7 +74,6 @@ from pandas.core.resample import Resampler from pandas.core.window.rolling import BaseWindow - ResType = dict[int, Any] @@ -976,7 +975,11 @@ def wrapper(*args, **kwargs): return wrapper if engine == "numba": - args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs) + args, kwargs = prepare_function_arguments( + self.func, # type: ignore[arg-type] + self.args, + self.kwargs, + ) # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has # incompatible type "Callable[..., Any] | str | list[Callable # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str | @@ -1139,10 +1142,10 @@ def numba_func(values, col_names, df_index, *args): return numba_func def apply_with_numba(self) -> dict[int, Any]: - args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs) + func = cast(Callable, self.func) + args, kwargs = prepare_function_arguments(func, self.args, self.kwargs) nb_func = self.generate_numba_apply_func( - cast(Callable, self.func), - **get_jit_arguments(self.engine_kwargs, kwargs), + func, **get_jit_arguments(self.engine_kwargs, kwargs) ) from pandas.core._numba.extensions import set_numba_data @@ -1284,10 +1287,10 @@ def numba_func(values, col_names_index, index, *args): return numba_func def apply_with_numba(self) -> dict[int, Any]: - args, kwargs = prepare_function_arguments(self.func, self.args, self.kwargs) + func = cast(Callable, self.func) + args, kwargs = prepare_function_arguments(func, self.args, self.kwargs) nb_func = self.generate_numba_apply_func( - cast(Callable, self.func), - **get_jit_arguments(self.engine_kwargs, kwargs), + func, **get_jit_arguments(self.engine_kwargs, kwargs) ) from pandas.core._numba.extensions import set_numba_data From 8925b3a307d0d5667a10b5b10b4f205afca11896 Mon Sep 17 00:00:00 2001 From: auderson Date: Sun, 26 May 2024 10:31:21 +0800 Subject: [PATCH 09/12] update get_jit_arguments --- pandas/core/util/numba_.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index da02c4b5ccf34..d93984d210cb4 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -55,10 +55,15 @@ def get_jit_arguments( engine_kwargs = {} nopython = engine_kwargs.get("nopython", True) - if kwargs and nopython: + if kwargs: + # Note: in case numba supports keyword-only arguments in + # a future version, we should remove this check. But this + # seems unlikely to happen soon. + raise NumbaUtilError( - "numba does not support kwargs with nopython=True: " - "https://github.com/numba/numba/issues/2916" + "numba does not support keyword-only arguments" + "https://github.com/numba/numba/issues/2916, " + "https://github.com/numba/numba/issues/6846" ) nogil = engine_kwargs.get("nogil", False) parallel = engine_kwargs.get("parallel", False) From 085ae73f68a4c5e4597f542a70bd132af39103f1 Mon Sep 17 00:00:00 2001 From: auderson Date: Sun, 26 May 2024 10:31:44 +0800 Subject: [PATCH 10/12] add nopython test in `test_apply_args` --- pandas/tests/apply/test_frame_apply.py | 36 +++++++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 003c3594a9441..939997f44c1a9 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -63,16 +63,28 @@ def test_apply(float_frame, engine, request): @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("raw", [True, False]) -def test_apply_args(float_frame, axis, raw, engine): +@pytest.mark.parametrize("nopython", [True, False]) +def test_apply_args(float_frame, axis, raw, engine, nopython): + engine_kwargs = {"nopython": nopython} result = float_frame.apply( - lambda x, y: x + y, axis, args=(1,), raw=raw, engine=engine + lambda x, y: x + y, + axis, + args=(1,), + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, ) expected = float_frame + 1 tm.assert_frame_equal(result, expected) # GH:58712 result = float_frame.apply( - lambda x, a, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw + lambda x, a, b: x + a + b, + args=(1,), + b=2, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, ) expected = float_frame + 3 tm.assert_frame_equal(result, expected) @@ -81,18 +93,28 @@ def test_apply_args(float_frame, axis, raw, engine): # keyword-only arguments are not supported in numba with pytest.raises( pd.errors.NumbaUtilError, - match="numba does not support kwargs with nopython=True", + match="numba does not support keyword-only arguments", ): float_frame.apply( - lambda x, a, *, b: x + a + b, args=(1,), b=2, engine=engine, raw=raw + lambda x, a, *, b: x + a + b, + args=(1,), + b=2, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, ) with pytest.raises( pd.errors.NumbaUtilError, - match="numba does not support kwargs with nopython=True", + match="numba does not support keyword-only arguments", ): float_frame.apply( - lambda *x, b: x[0] + x[1] + b, args=(1,), b=2, engine=engine, raw=raw + lambda *x, b: x[0] + x[1] + b, + args=(1,), + b=2, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, ) From c75e0b72143b96cf18096ef4c828025bc5b4544f Mon Sep 17 00:00:00 2001 From: auderson Date: Sun, 26 May 2024 11:27:13 +0800 Subject: [PATCH 11/12] fix test --- pandas/tests/window/test_numba.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 650eb911e410b..62e17db595985 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -304,7 +304,7 @@ def f(x): @td.skip_if_no("numba") def test_invalid_kwargs_nopython(): - with pytest.raises(NumbaUtilError, match="numba does not support kwargs with"): + with pytest.raises(NumbaUtilError, match="numba does not support keyword-only arguments"): Series(range(1)).rolling(1).apply( lambda x: x, kwargs={"a": 1}, engine="numba", raw=True ) From ceb817836778842d4317ab8969dab4609825d377 Mon Sep 17 00:00:00 2001 From: auderson Date: Sun, 26 May 2024 11:31:38 +0800 Subject: [PATCH 12/12] fix pre-commit --- pandas/tests/window/test_numba.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 62e17db595985..c743df859337b 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -304,7 +304,9 @@ def f(x): @td.skip_if_no("numba") def test_invalid_kwargs_nopython(): - with pytest.raises(NumbaUtilError, match="numba does not support keyword-only arguments"): + with pytest.raises( + NumbaUtilError, match="numba does not support keyword-only arguments" + ): Series(range(1)).rolling(1).apply( lambda x: x, kwargs={"a": 1}, engine="numba", raw=True )