Skip to content

Commit 6f61d7b

Browse files
ENH: Adding engine parameter to Series.map
1 parent 8943c97 commit 6f61d7b

File tree

5 files changed

+93
-55
lines changed

5 files changed

+93
-55
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Other enhancements
7171
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
7272
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
7373
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
74+
- :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
7475
- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
7576
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
7677
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)

pandas/core/series.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4322,6 +4322,7 @@ def map(
43224322
self,
43234323
arg: Callable | Mapping | Series,
43244324
na_action: Literal["ignore"] | None = None,
4325+
engine: Callable | None = None,
43254326
**kwargs,
43264327
) -> Series:
43274328
"""
@@ -4338,6 +4339,23 @@ def map(
43384339
na_action : {None, 'ignore'}, default None
43394340
If 'ignore', propagate NaN values, without passing them to the
43404341
mapping correspondence.
4342+
engine : decorator, optional
4343+
Choose the execution engine to use. If not provided the function
4344+
will be executed by the regular Python interpreter.
4345+
4346+
Other options include JIT compilers such as Numba and Bodo, which in some
4347+
cases can speed up the execution. To use an executor you can provide
4348+
the decorators ``numba.jit``, ``numba.njit`` or ``bodo.jit``. You can
4349+
also provide the decorator with parameters, like ``numba.jit(nogit=True)``.
4350+
4351+
Not all functions can be executed with all execution engines. In general,
4352+
JIT compilers will require type stability in the function (no variable
4353+
should change data type during the execution). And not all pandas and
4354+
NumPy APIs are supported. Check the engine documentation [1]_ and [2]_
4355+
for limitations.
4356+
4357+
.. versionadded:: 3.0.0
4358+
43414359
**kwargs
43424360
Additional keyword arguments to pass as keywords arguments to
43434361
`arg`.
@@ -4404,6 +4422,18 @@ def map(
44044422
3 I am a rabbit
44054423
dtype: object
44064424
"""
4425+
if engine is not None:
4426+
if not hasattr(engine, "__pandas_udf__"):
4427+
raise ValueError(f"Not a valid engine: {engine}")
4428+
return engine.__pandas_udf__.map(
4429+
data=self,
4430+
func=arg,
4431+
args=(),
4432+
kwargs=kwargs,
4433+
decorator=engine,
4434+
skip_na=na_action == "ignore",
4435+
).__finalize__(self, method="map")
4436+
44074437
if callable(arg):
44084438
arg = functools.partial(arg, **kwargs)
44094439
new_values = self._map_values(arg, na_action=na_action)

pandas/tests/apply/common.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,66 @@
1+
import numpy as np
2+
3+
from pandas import (
4+
DataFrame,
5+
Series,
6+
)
7+
from pandas.api.executors import BaseExecutionEngine
18
from pandas.core.groupby.base import transformation_kernels
29

310
# There is no Series.cumcount or DataFrame.cumcount
411
series_transform_kernels = [
512
x for x in sorted(transformation_kernels) if x != "cumcount"
613
]
714
frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"]
15+
16+
17+
class MockExecutionEngine(BaseExecutionEngine):
18+
"""
19+
Execution Engine to test if the execution engine interface receives and
20+
uses all parameters provided by the user.
21+
22+
Making this engine work as the default Python engine by calling it, no extra
23+
functionality is implemented here.
24+
25+
When testing, this will be called when this engine is provided, and then the
26+
same pandas.map and pandas.apply function will be called, but without engine,
27+
executing the default behavior from the python engine.
28+
"""
29+
30+
def map(data, func, args, kwargs, decorator, skip_na):
31+
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {}
32+
return data.map(
33+
func, action_na="ignore" if skip_na else False, **kwargs_to_pass
34+
)
35+
36+
def apply(data, func, args, kwargs, decorator, axis):
37+
if isinstance(data, Series):
38+
return data.apply(func, convert_dtype=True, args=args, by_row=False)
39+
elif isinstance(data, DataFrame):
40+
return data.apply(
41+
func,
42+
axis=axis,
43+
raw=False,
44+
result_type=None,
45+
args=args,
46+
by_row="compat",
47+
**kwargs,
48+
)
49+
else:
50+
assert isinstance(data, np.ndarray)
51+
52+
def wrap_function(func):
53+
# https://github.com/numpy/numpy/issues/8352
54+
def wrapper(*args, **kwargs):
55+
result = func(*args, **kwargs)
56+
if isinstance(result, str):
57+
result = np.array(result, dtype=object)
58+
return result
59+
60+
return wrapper
61+
62+
return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs)
63+
64+
65+
class MockEngineDecorator:
66+
__pandas_udf__ = MockExecutionEngine

pandas/tests/apply/test_frame_apply.py

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,63 +17,11 @@
1717
date_range,
1818
)
1919
import pandas._testing as tm
20-
from pandas.api.executors import BaseExecutionEngine
20+
from pandas.tests.apply.common import MockEngineDecorator
2121
from pandas.tests.frame.common import zip_frames
2222
from pandas.util.version import Version
2323

2424

25-
class MockExecutionEngine(BaseExecutionEngine):
26-
"""
27-
Execution Engine to test if the execution engine interface receives and
28-
uses all parameters provided by the user.
29-
30-
Making this engine work as the default Python engine by calling it, no extra
31-
functionality is implemented here.
32-
33-
When testing, this will be called when this engine is provided, and then the
34-
same pandas.map and pandas.apply function will be called, but without engine,
35-
executing the default behavior from the python engine.
36-
"""
37-
38-
def map(data, func, args, kwargs, decorator, skip_na):
39-
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {}
40-
return data.map(
41-
func, action_na="ignore" if skip_na else False, **kwargs_to_pass
42-
)
43-
44-
def apply(data, func, args, kwargs, decorator, axis):
45-
if isinstance(data, Series):
46-
return data.apply(func, convert_dtype=True, args=args, by_row=False)
47-
elif isinstance(data, DataFrame):
48-
return data.apply(
49-
func,
50-
axis=axis,
51-
raw=False,
52-
result_type=None,
53-
args=args,
54-
by_row="compat",
55-
**kwargs,
56-
)
57-
else:
58-
assert isinstance(data, np.ndarray)
59-
60-
def wrap_function(func):
61-
# https://github.com/numpy/numpy/issues/8352
62-
def wrapper(*args, **kwargs):
63-
result = func(*args, **kwargs)
64-
if isinstance(result, str):
65-
result = np.array(result, dtype=object)
66-
return result
67-
68-
return wrapper
69-
70-
return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs)
71-
72-
73-
class MockEngineDecorator:
74-
__pandas_udf__ = MockExecutionEngine
75-
76-
7725
@pytest.fixture
7826
def int_frame_const_col():
7927
"""

pandas/tests/apply/test_series_apply.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,13 +376,13 @@ def test_demo():
376376

377377

378378
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
379-
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
379+
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row, engine):
380380
# test that we are evaluating row-by-row first if by_row="compat"
381381
# else vectorized evaluation
382382
result = string_series.apply(func, by_row=by_row)
383383

384384
if by_row:
385-
expected = string_series.map(func)
385+
expected = string_series.map(func, engine=engine)
386386
tm.assert_series_equal(result, expected)
387387
else:
388388
assert result == str(string_series)

0 commit comments

Comments
 (0)