Skip to content

Commit 944e35d

Browse files
committed
fixups
1 parent cfd5fa4 commit 944e35d

File tree

10 files changed

+131
-65
lines changed

10 files changed

+131
-65
lines changed

pandas/core/apply.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,12 @@ def new_list_like(self, method: str) -> DataFrame | Series:
439439
result_dim = None
440440

441441
for a in arg:
442+
name = None
442443
try:
444+
if isinstance(a, (tuple, list)):
445+
# Handle name, value pairs
446+
name = a[0]
447+
a = a[1]
443448
new_res = getattr(obj, method)(a)
444449
if result_dim is None:
445450
result_dim = getattr(new_res, "ndim", 0)
@@ -453,15 +458,15 @@ def new_list_like(self, method: str) -> DataFrame | Series:
453458
results.append(new_res)
454459

455460
# make sure we find a good name
456-
name = com.get_callable_name(a) or a
461+
if name is None:
462+
name = com.get_callable_name(a) or a
457463
keys.append(name)
458464

459465
# if we are empty
460466
if not len(results):
461467
raise ValueError("no results")
462468

463469
try:
464-
465470
concatenated = concat(results, keys=keys, axis=1, sort=False)
466471
except TypeError:
467472
# we are concatting non-NDFrame objects,
@@ -567,6 +572,7 @@ def new_dict_like(self, method: str) -> DataFrame | Series:
567572
-------
568573
Result of aggregation.
569574
"""
575+
from pandas import Index
570576
from pandas.core.reshape.concat import concat
571577

572578
obj = self.obj
@@ -586,16 +592,16 @@ def new_dict_like(self, method: str) -> DataFrame | Series:
586592
# key only used for output
587593
colg = obj._gotitem(selection, ndim=1)
588594
results = {key: getattr(colg, method)(how) for key, how in arg.items()}
595+
589596
else:
590597
# key used for column selection and output
591-
results = [
592-
# ndim = 2 for groupby; act like we always have multiple columns
593-
getattr(obj._gotitem(key, ndim=2), method)(how)
598+
results = {
599+
key: getattr(obj._gotitem(key, ndim=1), method)(how)
594600
for key, how in arg.items()
595-
]
601+
}
596602
if self.renamer is not None:
597-
for idx, columns in enumerate(self.renamer.values()):
598-
results[idx].columns = columns
603+
for key, columns in self.renamer.items():
604+
results[key].columns = columns
599605

600606
# Avoid making two isinstance calls in all and any below
601607
if isinstance(results, dict):
@@ -605,7 +611,15 @@ def new_dict_like(self, method: str) -> DataFrame | Series:
605611

606612
# combine results
607613
if all(is_ndframe):
608-
result = concat(results, axis=1)
614+
keys_to_use = [k for k in arg.keys() if not results[k].empty]
615+
keys_to_use = keys_to_use if keys_to_use != [] else arg.keys()
616+
if selected_obj.ndim == 2:
617+
# keys are columns, so we can preserve names
618+
ktu = Index(keys_to_use)
619+
ktu._set_names(selected_obj.columns.names)
620+
keys_to_use = ktu
621+
keys = None if selected_obj.ndim == 1 else keys_to_use
622+
result = concat({k: results[k] for k in keys_to_use}, keys=keys, axis=1)
609623
if result.ndim == 1:
610624
result = result.to_frame()
611625
elif any(is_ndframe):

pandas/core/groupby/generic.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -888,8 +888,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
888888
relabeling, func, columns, order, _ = reconstruct_func(func, **kwargs)
889889
func = maybe_mangle_lambdas(func)
890890

891-
op = GroupByApply(self, func, args, kwargs)
892-
result = op.agg()
891+
with group_selection_context(self):
892+
op = GroupByApply(self, func, args, kwargs)
893+
result = op.agg()
893894
if not is_dict_like(func) and result is not None:
894895
return result
895896
elif relabeling and result is not None:
@@ -905,7 +906,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
905906
# can't return early
906907
result = self._aggregate_frame(func, *args, **kwargs)
907908

908-
elif self.axis == 1:
909+
elif self.axis == 1 and self.grouper.nkeys == 1:
909910
# _aggregate_multiple_funcs does not allow self.axis == 1
910911
# Note: axis == 1 precludes 'not self.as_index', see __init__
911912
result = self._aggregate_frame(func)
@@ -932,12 +933,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
932933
return result
933934

934935
else:
935-
936936
# try to treat as if we are passing a list
937-
if get_option("new_udf_methods"):
938-
gba = GroupByApply(self, func, args=(), kwargs={})
939-
else:
940-
gba = GroupByApply(self, [func], args=(), kwargs={})
937+
gba = GroupByApply(self, [func], args=(), kwargs={})
941938
try:
942939
result = gba.agg()
943940
if get_option("new_udf_methods") and result is None:

pandas/tests/apply/test_frame_apply.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,8 +1140,6 @@ def test_agg_multiple_mixed_no_warning():
11401140
else:
11411141
expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
11421142
tm.assert_frame_equal(result, expected)
1143-
if get_option("mode.new_udf_methods"):
1144-
assert False
11451143

11461144

11471145
def test_agg_reduce(axis, float_frame):

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
328328
)
329329
# PerformanceWarning is thrown by `assert col in right` in assert_frame_equal
330330
with tm.assert_produces_warning(PerformanceWarning):
331+
print("here!")
332+
print(result.head())
333+
print("---")
334+
print(expected.head())
331335
tm.assert_frame_equal(result, expected)
332336

333337

@@ -500,12 +504,18 @@ def test_order_aggregate_multiple_funcs():
500504
# GH 25692
501505
df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
502506

503-
res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
504-
result = res.columns.levels[1]
507+
if get_option("new_udf_methods"):
508+
# TODO (GH 35725): This will not raise when agg-must-agg is implemented
509+
msg = "Cannot concat indices that do not have the same number of levels"
510+
with pytest.raises(AssertionError, match=msg):
511+
df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
512+
else:
513+
res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
514+
result = res.columns.levels[1]
505515

506-
expected = Index(["sum", "max", "mean", "ohlc", "min"])
516+
expected = Index(["sum", "max", "mean", "ohlc", "min"])
507517

508-
tm.assert_index_equal(result, expected)
518+
tm.assert_index_equal(result, expected)
509519

510520

511521
@pytest.mark.parametrize("dtype", [np.int64, np.uint64])
@@ -846,19 +856,11 @@ def test_groupby_aggregate_empty_key(kwargs):
846856
# GH: 32580
847857
df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]})
848858
result = df.groupby("a").agg(kwargs)
849-
print(result)
850-
if get_option("new_udf_methods"):
851-
expected = DataFrame(
852-
[1, 4],
853-
index=Index([1, 2], dtype="int64", name="a"),
854-
columns=MultiIndex.from_tuples([["min", "c"]]),
855-
)
856-
else:
857-
expected = DataFrame(
858-
[1, 4],
859-
index=Index([1, 2], dtype="int64", name="a"),
860-
columns=MultiIndex.from_tuples([["c", "min"]]),
861-
)
859+
expected = DataFrame(
860+
[1, 4],
861+
index=Index([1, 2], dtype="int64", name="a"),
862+
columns=MultiIndex.from_tuples([["c", "min"]]),
863+
)
862864
tm.assert_frame_equal(result, expected)
863865

864866

@@ -1219,7 +1221,10 @@ def test_nonagg_agg():
12191221
g = df.groupby("a")
12201222

12211223
result = g.agg(["cumsum"])
1222-
result.columns = result.columns.droplevel(-1)
1224+
if get_option("new_udf_methods"):
1225+
result.columns = result.columns.droplevel(0)
1226+
else:
1227+
result.columns = result.columns.droplevel(-1)
12231228
expected = g.agg("cumsum")
12241229

12251230
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/aggregate/test_other.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas._config import get_option
12+
1113
import pandas.util._test_decorators as td
1214

1315
import pandas as pd
@@ -201,13 +203,21 @@ def test_aggregate_api_consistency():
201203
tm.assert_frame_equal(result, expected, check_like=True)
202204

203205
result = grouped.agg([np.sum, np.mean])
204-
expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
205-
expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
206+
if get_option("new_udf_methods"):
207+
expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1)
208+
expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]])
209+
else:
210+
expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
211+
expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
206212
tm.assert_frame_equal(result, expected, check_like=True)
207213

208214
result = grouped[["D", "C"]].agg([np.sum, np.mean])
209-
expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
210-
expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
215+
if get_option("new_udf_methods"):
216+
expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
217+
expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]])
218+
else:
219+
expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
220+
expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
211221
tm.assert_frame_equal(result, expected, check_like=True)
212222

213223
result = grouped.agg({"C": "mean", "D": "sum"})
@@ -393,7 +403,10 @@ def P1(a):
393403
g = df.groupby("date")
394404

395405
expected = g.agg([P1])
396-
expected.columns = expected.columns.levels[0]
406+
if get_option("new_udf_methods"):
407+
expected.columns = expected.columns.levels[1]
408+
else:
409+
expected.columns = expected.columns.levels[0]
397410

398411
result = g.agg(P1)
399412
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_groupby.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,9 +1996,14 @@ def test_groupby_agg_ohlc_non_first():
19961996
index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
19971997
)
19981998

1999-
result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
2000-
2001-
tm.assert_frame_equal(result, expected)
1999+
if get_option("new_udf_methods"):
2000+
# TODO (GH 35725): This will not raise when agg-must-agg is implemented
2001+
msg = "Cannot concat indices that do not have the same number of levels"
2002+
with pytest.raises(AssertionError, match=msg):
2003+
df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
2004+
else:
2005+
result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
2006+
tm.assert_frame_equal(result, expected)
20022007

20032008

20042009
def test_groupby_multiindex_nat():

pandas/tests/resample/test_deprecated.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pandas import (
1111
DataFrame,
1212
Series,
13+
get_option,
1314
)
1415
import pandas._testing as tm
1516
from pandas.core.indexes.datetimes import date_range
@@ -97,7 +98,10 @@ def test_resample_loffset_arg_type(frame, create_index, arg):
9798
result_agg = df.resample("2D", loffset="2H").agg(arg)
9899

99100
if isinstance(arg, list):
100-
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
101+
if get_option("new_udf_methods"):
102+
expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
103+
else:
104+
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
101105

102106
tm.assert_frame_equal(result_agg, expected)
103107

@@ -216,7 +220,10 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
216220
with tm.assert_produces_warning(FutureWarning):
217221
result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
218222
if isinstance(agg_arg, list):
219-
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
223+
if get_option("new_udf_methods"):
224+
expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
225+
else:
226+
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
220227
tm.assert_frame_equal(result_agg, expected)
221228

222229

pandas/tests/resample/test_resample_api.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
DataFrame,
99
NamedAgg,
1010
Series,
11+
get_option,
1112
)
1213
import pandas._testing as tm
1314
from pandas.core.indexes.datetimes import date_range
@@ -347,15 +348,14 @@ def test_agg():
347348
b_std = r["B"].std()
348349
b_sum = r["B"].sum()
349350

350-
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
351-
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
351+
if get_option("new_udf_methods"):
352+
expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1)
353+
expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]])
354+
else:
355+
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
356+
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
352357
for t in cases:
353-
warn = FutureWarning if t in cases[1:3] else None
354-
with tm.assert_produces_warning(
355-
warn, match="Dropping invalid columns", check_stacklevel=False
356-
):
357-
# .var on dt64 column raises and is dropped
358-
result = t.aggregate([np.mean, np.std])
358+
result = t.aggregate([np.mean, np.std])
359359
tm.assert_frame_equal(result, expected)
360360

361361
expected = pd.concat([a_mean, b_std], axis=1)
@@ -628,11 +628,22 @@ def test_agg_with_datetime_index_list_agg_func(col_name):
628628
columns=[col_name],
629629
)
630630
result = df.resample("1d").aggregate(["mean"])
631-
expected = DataFrame(
632-
[47.5, 143.5, 195.5],
633-
index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
634-
columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
635-
)
631+
if get_option("new_udf_methods"):
632+
expected = DataFrame(
633+
[47.5, 143.5, 195.5],
634+
index=date_range(
635+
start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
636+
),
637+
columns=pd.MultiIndex(levels=[["mean"], [col_name]], codes=[[0], [0]]),
638+
)
639+
else:
640+
expected = DataFrame(
641+
[47.5, 143.5, 195.5],
642+
index=date_range(
643+
start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
644+
),
645+
columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
646+
)
636647
tm.assert_frame_equal(result, expected)
637648

638649

pandas/tests/reshape/test_pivot.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas._config import get_option
12+
1113
import pandas as pd
1214
from pandas import (
1315
Categorical,
@@ -1905,8 +1907,14 @@ def test_pivot_margins_name_unicode(self):
19051907
frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
19061908
)
19071909
index = Index([1, 2, 3, greek], dtype="object", name="foo")
1908-
expected = DataFrame(index=index)
1909-
tm.assert_frame_equal(table, expected)
1910+
1911+
if get_option("new_udf_methods"):
1912+
expected = Series([1, 1, 1, 3], index=index)
1913+
expected.index.name = None
1914+
tm.assert_series_equal(table, expected)
1915+
else:
1916+
expected = DataFrame(index=index)
1917+
tm.assert_frame_equal(table, expected)
19101918

19111919
def test_pivot_string_as_func(self):
19121920
# GH #18713

0 commit comments

Comments
 (0)