fixups

rhshadrach · rhshadrach · commit 944e35d2ccab · 2021-09-20T17:52:46.000-04:00
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -439,7 +439,12 @@ def new_list_like(self, method: str) -> DataFrame | Series:
         result_dim = None
 
         for a in arg:
+            name = None
             try:
+                if isinstance(a, (tuple, list)):
+                    # Handle name, value pairs
+                    name = a[0]
+                    a = a[1]
                 new_res = getattr(obj, method)(a)
                 if result_dim is None:
                     result_dim = getattr(new_res, "ndim", 0)
@@ -453,15 +458,15 @@ def new_list_like(self, method: str) -> DataFrame | Series:
                 results.append(new_res)
 
                 # make sure we find a good name
-                name = com.get_callable_name(a) or a
+                if name is None:
+                    name = com.get_callable_name(a) or a
                 keys.append(name)
 
         # if we are empty
         if not len(results):
             raise ValueError("no results")
 
         try:
-
             concatenated = concat(results, keys=keys, axis=1, sort=False)
         except TypeError:
             # we are concatting non-NDFrame objects,
@@ -567,6 +572,7 @@ def new_dict_like(self, method: str) -> DataFrame | Series:
         -------
         Result of aggregation.
         """
+        from pandas import Index
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
@@ -586,16 +592,16 @@ def new_dict_like(self, method: str) -> DataFrame | Series:
             # key only used for output
             colg = obj._gotitem(selection, ndim=1)
             results = {key: getattr(colg, method)(how) for key, how in arg.items()}
+
         else:
             # key used for column selection and output
-            results = [
-                # ndim = 2 for groupby; act like we always have multiple columns
-                getattr(obj._gotitem(key, ndim=2), method)(how)
+            results = {
+                key: getattr(obj._gotitem(key, ndim=1), method)(how)
                 for key, how in arg.items()
-            ]
+            }
             if self.renamer is not None:
-                for idx, columns in enumerate(self.renamer.values()):
-                    results[idx].columns = columns
+                for key, columns in self.renamer.items():
+                    results[key].columns = columns
 
         # Avoid making two isinstance calls in all and any below
         if isinstance(results, dict):
@@ -605,7 +611,15 @@ def new_dict_like(self, method: str) -> DataFrame | Series:
 
         # combine results
         if all(is_ndframe):
-            result = concat(results, axis=1)
+            keys_to_use = [k for k in arg.keys() if not results[k].empty]
+            keys_to_use = keys_to_use if keys_to_use != [] else arg.keys()
+            if selected_obj.ndim == 2:
+                # keys are columns, so we can preserve names
+                ktu = Index(keys_to_use)
+                ktu._set_names(selected_obj.columns.names)
+                keys_to_use = ktu
+            keys = None if selected_obj.ndim == 1 else keys_to_use
+            result = concat({k: results[k] for k in keys_to_use}, keys=keys, axis=1)
             if result.ndim == 1:
                 result = result.to_frame()
         elif any(is_ndframe):
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -888,8 +888,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
         relabeling, func, columns, order, _ = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
 
-        op = GroupByApply(self, func, args, kwargs)
-        result = op.agg()
+        with group_selection_context(self):
+            op = GroupByApply(self, func, args, kwargs)
+            result = op.agg()
         if not is_dict_like(func) and result is not None:
             return result
         elif relabeling and result is not None:
@@ -905,7 +906,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     # can't return early
                     result = self._aggregate_frame(func, *args, **kwargs)
 
-                elif self.axis == 1:
+                elif self.axis == 1 and self.grouper.nkeys == 1:
                     # _aggregate_multiple_funcs does not allow self.axis == 1
                     # Note: axis == 1 precludes 'not self.as_index', see __init__
                     result = self._aggregate_frame(func)
@@ -932,12 +933,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     return result
 
                 else:
-
                     # try to treat as if we are passing a list
-                    if get_option("new_udf_methods"):
-                        gba = GroupByApply(self, func, args=(), kwargs={})
-                    else:
-                        gba = GroupByApply(self, [func], args=(), kwargs={})
+                    gba = GroupByApply(self, [func], args=(), kwargs={})
                     try:
                         result = gba.agg()
                         if get_option("new_udf_methods") and result is None:
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1140,8 +1140,6 @@ def test_agg_multiple_mixed_no_warning():
     else:
         expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
     tm.assert_frame_equal(result, expected)
-    if get_option("mode.new_udf_methods"):
-        assert False
 
 
 def test_agg_reduce(axis, float_frame):
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -328,6 +328,10 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
     )
     # PerformanceWarning is thrown by `assert col in right` in assert_frame_equal
     with tm.assert_produces_warning(PerformanceWarning):
+        print("here!")
+        print(result.head())
+        print("---")
+        print(expected.head())
         tm.assert_frame_equal(result, expected)
 
 
@@ -500,12 +504,18 @@ def test_order_aggregate_multiple_funcs():
     # GH 25692
     df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
 
-    res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
-    result = res.columns.levels[1]
+    if get_option("new_udf_methods"):
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+    else:
+        res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+        result = res.columns.levels[1]
 
-    expected = Index(["sum", "max", "mean", "ohlc", "min"])
+        expected = Index(["sum", "max", "mean", "ohlc", "min"])
 
-    tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
 
 @pytest.mark.parametrize("dtype", [np.int64, np.uint64])
@@ -846,19 +856,11 @@ def test_groupby_aggregate_empty_key(kwargs):
     # GH: 32580
     df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]})
     result = df.groupby("a").agg(kwargs)
-    print(result)
-    if get_option("new_udf_methods"):
-        expected = DataFrame(
-            [1, 4],
-            index=Index([1, 2], dtype="int64", name="a"),
-            columns=MultiIndex.from_tuples([["min", "c"]]),
-        )
-    else:
-        expected = DataFrame(
-            [1, 4],
-            index=Index([1, 2], dtype="int64", name="a"),
-            columns=MultiIndex.from_tuples([["c", "min"]]),
-        )
+    expected = DataFrame(
+        [1, 4],
+        index=Index([1, 2], dtype="int64", name="a"),
+        columns=MultiIndex.from_tuples([["c", "min"]]),
+    )
     tm.assert_frame_equal(result, expected)
 
 
@@ -1219,7 +1221,10 @@ def test_nonagg_agg():
     g = df.groupby("a")
 
     result = g.agg(["cumsum"])
-    result.columns = result.columns.droplevel(-1)
+    if get_option("new_udf_methods"):
+        result.columns = result.columns.droplevel(0)
+    else:
+        result.columns = result.columns.droplevel(-1)
     expected = g.agg("cumsum")
 
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import get_option
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -201,13 +203,21 @@ def test_aggregate_api_consistency():
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg([np.sum, np.mean])
-    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]])
+    else:
+        expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped[["D", "C"]].agg([np.sum, np.mean])
-    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]])
+    else:
+        expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({"C": "mean", "D": "sum"})
@@ -393,7 +403,10 @@ def P1(a):
     g = df.groupby("date")
 
     expected = g.agg([P1])
-    expected.columns = expected.columns.levels[0]
+    if get_option("new_udf_methods"):
+        expected.columns = expected.columns.levels[1]
+    else:
+        expected.columns = expected.columns.levels[0]
 
     result = g.agg(P1)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1996,9 +1996,14 @@ def test_groupby_agg_ohlc_non_first():
         index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
     )
 
-    result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
-
-    tm.assert_frame_equal(result, expected)
+    if get_option("new_udf_methods"):
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+    else:
+        result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+        tm.assert_frame_equal(result, expected)
 
 
 def test_groupby_multiindex_nat():
diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py
@@ -10,6 +10,7 @@
 from pandas import (
     DataFrame,
     Series,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
@@ -97,7 +98,10 @@ def test_resample_loffset_arg_type(frame, create_index, arg):
         result_agg = df.resample("2D", loffset="2H").agg(arg)
 
     if isinstance(arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if get_option("new_udf_methods"):
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
 
     tm.assert_frame_equal(result_agg, expected)
 
@@ -216,7 +220,10 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
     with tm.assert_produces_warning(FutureWarning):
         result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
     if isinstance(agg_arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if get_option("new_udf_methods"):
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
     tm.assert_frame_equal(result_agg, expected)
 
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -8,6 +8,7 @@
     DataFrame,
     NamedAgg,
     Series,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
@@ -347,15 +348,14 @@ def test_agg():
     b_std = r["B"].std()
     b_sum = r["B"].sum()
 
-    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
-    expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]])
+    else:
+        expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
-        warn = FutureWarning if t in cases[1:3] else None
-        with tm.assert_produces_warning(
-            warn, match="Dropping invalid columns", check_stacklevel=False
-        ):
-            # .var on dt64 column raises and is dropped
-            result = t.aggregate([np.mean, np.std])
+        result = t.aggregate([np.mean, np.std])
         tm.assert_frame_equal(result, expected)
 
     expected = pd.concat([a_mean, b_std], axis=1)
@@ -628,11 +628,22 @@ def test_agg_with_datetime_index_list_agg_func(col_name):
         columns=[col_name],
     )
     result = df.resample("1d").aggregate(["mean"])
-    expected = DataFrame(
-        [47.5, 143.5, 195.5],
-        index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
-        columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
-    )
+    if get_option("new_udf_methods"):
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[["mean"], [col_name]], codes=[[0], [0]]),
+        )
+    else:
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
+        )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import get_option
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -1905,8 +1907,14 @@ def test_pivot_margins_name_unicode(self):
             frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
         )
         index = Index([1, 2, 3, greek], dtype="object", name="foo")
-        expected = DataFrame(index=index)
-        tm.assert_frame_equal(table, expected)
+
+        if get_option("new_udf_methods"):
+            expected = Series([1, 1, 1, 3], index=index)
+            expected.index.name = None
+            tm.assert_series_equal(table, expected)
+        else:
+            expected = DataFrame(index=index)
+            tm.assert_frame_equal(table, expected)
 
     def test_pivot_string_as_func(self):
         # GH #18713
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py