From 4524a9b15f668254089752393dc4f9b631b94e4c Mon Sep 17 00:00:00 2001 From: rhshadrach Date: Tue, 12 Jan 2021 22:08:27 -0500 Subject: [PATCH 1/3] ENH: Allow Series.apply to accept list-like and dict-like --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/aggregation.py | 3 +- pandas/core/apply.py | 35 ++++++--- .../tests/series/apply/test_series_apply.py | 73 ++++++++++++++++++- 4 files changed, 99 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index db2e2ba3a2e1e..86f14c57f29c2 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -56,6 +56,7 @@ Other enhancements - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`) - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) - :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) +- :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 72a98da44428b..2145551833e90 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -704,7 +704,8 @@ def agg_dict_like( # if we have a dict of any non-scalars # eg. {'A' : ['mean']}, normalize all to # be list-likes - if any(is_aggregator(x) for x in arg.values()): + # Cannot use arg.values() because arg may be a Series + if any(is_aggregator(x) for _, x in arg.items()): new_arg: AggFuncTypeDict = {} for k, v in arg.items(): if not isinstance(v, (tuple, list, dict)): diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f3e759610b784..32f89d531b590 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -195,6 +195,24 @@ def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]: self.kwds["axis"] = self.axis return self.obj._try_aggregate_string_function(f, *self.args, **self.kwds) + def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]: + """ + Compute apply in case of a list-like or dict-like. + + Returns + ------- + result: Series, DataFrame, or None + Result when self.f is a list-like or dict-like, None otherwise. + """ + # Note: dict-likes are list-like + if not is_list_like(self.f): + return None + # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets + # multiple values for keyword argument "axis" + return self.obj.aggregate( # type: ignore[misc] + self.f, axis=self.axis, *self.args, **self.kwds + ) + class FrameApply(Apply): obj: DataFrame @@ -248,12 +266,9 @@ def agg_axis(self) -> Index: def apply(self) -> FrameOrSeriesUnion: """ compute the results """ # dispatch to agg - if is_list_like(self.f) or is_dict_like(self.f): - # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets - # multiple values for keyword argument "axis" - return self.obj.aggregate( # type: ignore[misc] - self.f, axis=self.axis, *self.args, **self.kwds - ) + result = self.maybe_apply_multiple() + if result is not None: + return result # all empty if len(self.columns) == 0 and len(self.index) == 0: @@ -587,16 +602,14 @@ def __init__( def apply(self) -> FrameOrSeriesUnion: obj = self.obj - func = self.f - args = self.args - kwds = self.kwds if len(obj) == 0: return self.apply_empty_result() # dispatch to agg - if isinstance(func, (list, dict)): - return obj.aggregate(func, *args, **kwds) + result = self.maybe_apply_multiple() + if result is not None: + return result # if we are a string, try to dispatch result = self.maybe_apply_str() diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py index b8c6291415ef7..57d50427c190f 100644 --- a/pandas/tests/series/apply/test_series_apply.py +++ b/pandas/tests/series/apply/test_series_apply.py @@ -7,7 +7,7 @@ from pandas.core.dtypes.common import is_number import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, isna, timedelta_range +from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, timedelta_range import pandas._testing as tm from pandas.core.base import SpecificationError @@ -827,3 +827,74 @@ def test_apply_to_timedelta(self): b = Series(list_of_strings).apply(pd.to_timedelta) # noqa # Can't compare until apply on a Series gives the correct dtype # assert_series_equal(a, b) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sum], ["sum"]), + ([np.sum, np.mean], ["sum", "mean"]), + (np.array([np.sum]), ["sum"]), + (np.array([np.sum, np.mean]), ["sum", "mean"]), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_listlike_reducer(string_series, ops, names, how): + # GH 39140 + expected = Series({name: op(string_series) for name, op in zip(names, ops)}) + expected.name = "series" + result = getattr(string_series, how)(ops) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {"A": np.sum}, + {"A": np.sum, "B": np.mean}, + Series({"A": np.sum}), + Series({"A": np.sum, "B": np.mean}), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_dictlike_reducer(string_series, ops, how): + # GH 39140 + expected = Series({name: op(string_series) for name, op in ops.items()}) + expected.name = string_series.name + result = getattr(string_series, how)(ops) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], +) +def test_apply_listlike_transformer(string_series, ops, names): + # GH 39140 + with np.errstate(all="ignore"): + expected = concat([op(string_series) for op in ops], axis=1) + expected.columns = names + result = string_series.apply(ops) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {"A": np.sqrt}, + {"A": np.sqrt, "B": np.exp}, + Series({"A": np.sqrt}), + Series({"A": np.sqrt, "B": np.exp}), + ], +) +def test_apply_dictlike_transformer(string_series, ops): + # GH 39140 + expected = concat({name: op(string_series) for name, op in ops.items()}) + expected.name = string_series.name + result = string_series.apply(ops) + tm.assert_series_equal(result, expected) From c95db302dc00eac8f9f38a390ec8377cecc2f261 Mon Sep 17 00:00:00 2001 From: rhshadrach Date: Wed, 13 Jan 2021 20:30:00 -0500 Subject: [PATCH 2/3] Fix call to aggregate --- pandas/core/apply.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 32f89d531b590..f7c7220985138 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -207,11 +207,7 @@ def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]: # Note: dict-likes are list-like if not is_list_like(self.f): return None - # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets - # multiple values for keyword argument "axis" - return self.obj.aggregate( # type: ignore[misc] - self.f, axis=self.axis, *self.args, **self.kwds - ) + return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwds) class FrameApply(Apply): From 59443f9aaadfec543df3d1d6d245ea44e23b88a7 Mon Sep 17 00:00:00 2001 From: rhshadrach Date: Wed, 13 Jan 2021 21:44:04 -0500 Subject: [PATCH 3/3] with np.errstate for sqrt test --- pandas/tests/series/apply/test_series_apply.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py index 57d50427c190f..a5c40af5c7f35 100644 --- a/pandas/tests/series/apply/test_series_apply.py +++ b/pandas/tests/series/apply/test_series_apply.py @@ -894,7 +894,8 @@ def test_apply_listlike_transformer(string_series, ops, names): ) def test_apply_dictlike_transformer(string_series, ops): # GH 39140 - expected = concat({name: op(string_series) for name, op in ops.items()}) - expected.name = string_series.name - result = string_series.apply(ops) - tm.assert_series_equal(result, expected) + with np.errstate(all="ignore"): + expected = concat({name: op(string_series) for name, op in ops.items()}) + expected.name = string_series.name + result = string_series.apply(ops) + tm.assert_series_equal(result, expected)