pandas-dev · jreback · Sep 12, 2020 · Aug 18, 2020 · Sep 5, 2020 · Sep 5, 2020
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -113,7 +113,7 @@
 )
 from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
 
-from pandas.core import algorithms, common as com, nanops, ops
+from pandas.core import algorithms, base, common as com, nanops, ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.aggregation import reconstruct_func, relabel_result
 from pandas.core.arrays import Categorical, ExtensionArray
@@ -7440,7 +7440,20 @@ def transform(self, func, axis=0, *args, **kwargs) -> "DataFrame":
         axis = self._get_axis_number(axis)
         if axis == 1:
             return self.T.transform(func, *args, **kwargs).T
-        return super().transform(func, *args, **kwargs)
+
+        if isinstance(func, list):
+            func = {col: func for col in self}
+        elif isinstance(func, dict):
+            cols = sorted(set(func.keys()) - set(self.columns))
+            if len(cols) > 0:
+                raise base.SpecificationError(f"Column(s) {cols} do not exist")
+            if any(isinstance(v, dict) for v in func.values()):
+                # GH 15931 - deprecation of renaming keys
+                raise base.SpecificationError("nested renamer is not supported")
+
+        result = self._transform(func, *args, **kwargs)
+
+        return result
 
     def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds):
         """

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10750,9 +10750,48 @@ def transform(self, func, *args, **kwargs):
         1  1.000000   2.718282
         2  1.414214   7.389056
         """
-        result = self.agg(func, *args, **kwargs)
-        if is_scalar(result) or len(result) != len(self):
-            raise ValueError("transforms cannot produce aggregated results")
+        raise NotImplementedError
+
+    def _transform(self, func, *args, **kwargs):
+        if isinstance(func, dict):
+            results = {}
+            for name, how in func.items():
+                colg = self._gotitem(name, ndim=1)
+                try:
+                    results[name] = colg.transform(how, *args, **kwargs)
+                except Exception as e:
+                    if str(e) == "Function did not transform":
+                        raise e
+
+            # combine results
+            if len(results) == 0:
+                raise ValueError("Transform function failed")
+            from pandas.core.reshape.concat import concat
+
+            return concat(results, axis=1)
+
+        try:
+            if isinstance(func, str):
+                result = self._try_aggregate_string_function(func, *args, **kwargs)
+            else:
+                f = self._get_cython_func(func)
+                if f and not args and not kwargs:
+                    result = getattr(self, f)()
+                else:
+                    try:
+                        result = self.apply(func, args=args, **kwargs)
+                    except Exception:
+                        result = func(self, *args, **kwargs)
+
+        except Exception:
+            raise ValueError("Transform function failed")
+
+        # Functions that transform may return empty Series/DataFrame
+        # when the dtype is not appropriate
+        if isinstance(result, NDFrame) and result.empty:
+            raise ValueError("Transform function failed")
+        if not isinstance(result, NDFrame) or not result.index.equals(self.index):
+            raise ValueError("Function did not transform")
 
         return result
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4083,7 +4083,15 @@ def aggregate(self, func=None, axis=0, *args, **kwargs):
     def transform(self, func, axis=0, *args, **kwargs):
         # Validate the axis parameter
         self._get_axis_number(axis)
-        return super().transform(func, *args, **kwargs)
+
+        if isinstance(func, list):
+            func = {com.get_callable_name(v) or v: v for v in func}
+        elif isinstance(func, dict):
+            if any(isinstance(v, dict) for v in func.values()):
+                raise base.SpecificationError("nested renamer is not supported")
+
+        result = self._transform(func, *args, **kwargs)
+        return result
 
     def apply(self, func, convert_dtype=True, args=(), **kwds):
         """

diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py
@@ -2,6 +2,7 @@
 from datetime import datetime
 from itertools import chain
 import operator
+import re
 import warnings
 
 import numpy as np
@@ -14,6 +15,7 @@
 import pandas._testing as tm
 from pandas.core.apply import frame_apply
 from pandas.core.base import SpecificationError
+from pandas.core.groupby.base import transformation_kernels
 
 
 @pytest.fixture
@@ -1131,9 +1133,29 @@ def test_agg_transform(self, axis, float_frame):
             result = float_frame.transform([np.abs, "sqrt"], axis=axis)
             tm.assert_frame_equal(result, expected)
 
+            # UDF via apply
+            def func(x):
+                if isinstance(x, DataFrame):
+                    raise ValueError
+                return x + 1
+
+            result = float_frame.transform(func, axis=axis)
+            expected = float_frame + 1
+            tm.assert_frame_equal(result, expected)
+
+            # UDF that maps DataFrame -> DataFrame
+            def func(x):
+                if not isinstance(x, DataFrame):
+                    raise ValueError
+                return x + 1
+
+            result = float_frame.transform(func, axis=axis)
+            expected = float_frame + 1
+            tm.assert_frame_equal(result, expected)
+
     def test_transform_and_agg_err(self, axis, float_frame):
         # cannot both transform and agg
-        msg = "transforms cannot produce aggregated results"
+        msg = "Function did not transform"
         with pytest.raises(ValueError, match=msg):
             float_frame.transform(["max", "min"], axis=axis)
 
@@ -1142,6 +1164,7 @@ def test_transform_and_agg_err(self, axis, float_frame):
             with np.errstate(all="ignore"):
                 float_frame.agg(["max", "sqrt"], axis=axis)
 
+        msg = "Function did not transform"
         with pytest.raises(ValueError, match=msg):
             with np.errstate(all="ignore"):
                 float_frame.transform(["max", "sqrt"], axis=axis)
@@ -1221,6 +1244,9 @@ def test_agg_dict_nested_renaming_depr(self):
         with pytest.raises(SpecificationError, match=msg):
             df.agg({"A": {"foo": "min"}, "B": {"bar": "max"}})
 
+        with pytest.raises(SpecificationError, match=msg):
+            df.transform({"A": {"foo": "min"}, "B": {"bar": "max"}})
+
     def test_agg_reduce(self, axis, float_frame):
         other_axis = 1 if axis in {0, "index"} else 0
         name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
@@ -1550,3 +1576,88 @@ def test_apply_empty_list_reduce():
     result = df.apply(lambda x: [], result_type="reduce")
     expected = pd.Series({"a": [], "b": []}, dtype=object)
     tm.assert_series_equal(result, expected)
+
+
+def test_transform_reducer_raises(all_reductions):
+    op = all_reductions
+    s = pd.DataFrame({"A": [1, 2, 3]})
+    msg = "Function did not transform"
+    with pytest.raises(ValueError, match=msg):
+        s.transform(op)
+    with pytest.raises(ValueError, match=msg):
+        s.transform([op])
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": op})
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": [op]})
+
+
+# mypy doesn't allow adding lists of different types
+# https://github.com/python/mypy/issues/5492
+@pytest.mark.parametrize("op", [*transformation_kernels, lambda x: x + 1])
+def test_transform_bad_dtype(op):
+    s = pd.DataFrame({"A": 3 * [object]})  # DataFrame that will fail on most transforms
+    if op in ("backfill", "shift", "pad", "bfill", "ffill"):
+        pytest.xfail("Transform function works on any datatype")
+    msg = "Transform function failed"
+    with pytest.raises(ValueError, match=msg):
+        s.transform(op)
+    with pytest.raises(ValueError, match=msg):
+        s.transform([op])
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": op})
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": [op]})
+
+
+@pytest.mark.parametrize("op", transformation_kernels)
+def test_transform_multi_dtypes(op):
+    df = pd.DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]})
+
+    # Determine which columns op will work on
+    columns = []
+    for column in df:
+        try:
+            df[column].transform(op)
+            columns.append(column)
+        except Exception:
+            pass
+
+    if len(columns) > 0:
+        expected = df[columns].transform([op])
+        result = df.transform([op])
+        tm.assert_equal(result, expected)
+
+        expected = df[columns].transform({column: op for column in columns})
+        result = df.transform({column: op for column in columns})
+        tm.assert_equal(result, expected)
+
+        expected = df[columns].transform({column: [op] for column in columns})
+        result = df.transform({column: [op] for column in columns})
+        tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize("use_apply", [True, False])
+def test_transform_passes_args(use_apply):
+    # transform uses UDF either via apply or passing the entire DataFrame
+    expected_args = [1, 2]
+    expected_kwargs = {"c": 3}
+
+    def f(x, a, b, c):
+        # transform is using apply iff x is not a DataFrame
+        if use_apply == isinstance(x, DataFrame):
+            # Force transform to fallback
+            raise ValueError
+        assert [a, b] == expected_args
+        assert c == expected_kwargs["c"]
+        return x
+
+    pd.DataFrame([1]).transform(f, 0, *expected_args, **expected_kwargs)
+
+
+@pytest.mark.parametrize("axis", [0, "index", 1, "columns"])
+def test_transform_missing_columns(axis):
+    df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
+    match = re.escape("Column(s) ['C'] do not exist")
+    with pytest.raises(SpecificationError, match=match):
+        df.transform({"C": "cumsum"})
diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py
@@ -8,6 +8,7 @@
 from pandas import DataFrame, Index, MultiIndex, Series, isna
 import pandas._testing as tm
 from pandas.core.base import SpecificationError
+from pandas.core.groupby.base import transformation_kernels
 
 
 class TestSeriesApply:
@@ -222,7 +223,7 @@ def test_transform(self, string_series):
             expected.columns = ["sqrt"]
             tm.assert_frame_equal(result, expected)
 
-            result = string_series.transform([np.sqrt])
+            result = string_series.apply([np.sqrt])
             tm.assert_frame_equal(result, expected)
 
             result = string_series.transform(["sqrt"])
@@ -248,9 +249,34 @@ def test_transform(self, string_series):
             result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
             tm.assert_series_equal(result.reindex_like(expected), expected)
 
+            expected = pd.concat([f_sqrt, f_abs], axis=1)
+            expected.columns = ["foo", "bar"]
+            result = string_series.transform({"foo": np.sqrt, "bar": np.abs})
+            tm.assert_frame_equal(result, expected)
+
+            # UDF via apply
+            def func(x):
+                if isinstance(x, Series):
+                    raise ValueError
+                return x + 1
+
+            result = string_series.transform(func)
+            expected = string_series + 1
+            tm.assert_series_equal(result, expected)
+
+            # UDF that maps Series -> Series
+            def func(x):
+                if not isinstance(x, Series):
+                    raise ValueError
+                return x + 1
+
+            result = string_series.transform(func)
+            expected = string_series + 1
+            tm.assert_series_equal(result, expected)
+
     def test_transform_and_agg_error(self, string_series):
         # we are trying to transform with an aggregator
-        msg = "transforms cannot produce aggregated results"
+        msg = "Function did not transform"
         with pytest.raises(ValueError, match=msg):
             string_series.transform(["min", "max"])
 
@@ -259,6 +285,7 @@ def test_transform_and_agg_error(self, string_series):
             with np.errstate(all="ignore"):
                 string_series.agg(["sqrt", "max"])
 
+        msg = "Function did not transform"
         with pytest.raises(ValueError, match=msg):
             with np.errstate(all="ignore"):
                 string_series.transform(["sqrt", "max"])
@@ -467,11 +494,73 @@ def test_transform_none_to_type(self):
         # GH34377
         df = pd.DataFrame({"a": [None]})
 
-        msg = "DataFrame constructor called with incompatible data and dtype"
-        with pytest.raises(TypeError, match=msg):
+        msg = "Transform function failed.*"
+        with pytest.raises(ValueError, match=msg):
             df.transform({"a": int})
 
 
+def test_transform_reducer_raises(all_reductions):
+    op = all_reductions
+    s = pd.Series([1, 2, 3])
+    msg = "Function did not transform"
+    with pytest.raises(ValueError, match=msg):
+        s.transform(op)
+    with pytest.raises(ValueError, match=msg):
+        s.transform([op])
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": op})
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": [op]})
+
+
+# mypy doesn't allow adding lists of different types
+# https://github.com/python/mypy/issues/5492
+@pytest.mark.parametrize("op", [*transformation_kernels, lambda x: x + 1])
+def test_transform_bad_dtype(op):
+    s = pd.Series(3 * [object])  # Series that will fail on most transforms
+    if op in ("backfill", "shift", "pad", "bfill", "ffill"):
+        pytest.xfail("Transform function works on any datatype")
+    msg = "Transform function failed"
+    with pytest.raises(ValueError, match=msg):
+        s.transform(op)
+    with pytest.raises(ValueError, match=msg):
+        s.transform([op])
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": op})
+    with pytest.raises(ValueError, match=msg):
+        s.transform({"A": [op]})
+
+
+@pytest.mark.parametrize("use_apply", [True, False])
+def test_transform_passes_args(use_apply):
+    # transform uses UDF either via apply or passing the entire Series
+    expected_args = [1, 2]
+    expected_kwargs = {"c": 3}
+
+    def f(x, a, b, c):
+        # transform is using apply iff x is not a Series
+        if use_apply == isinstance(x, Series):
+            # Force transform to fallback
+            raise ValueError
+        assert [a, b] == expected_args
+        assert c == expected_kwargs["c"]
+        return x
+
+    pd.Series([1]).transform(f, 0, *expected_args, **expected_kwargs)
+
+
+def test_transform_axis_1_raises():
+    msg = "No axis named 1 for object type Series"
+    with pytest.raises(ValueError, match=msg):
+        pd.Series([1]).transform("sum", axis=1)
+
+
+def test_transform_nested_renamer():
+    match = "nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=match):
+        pd.Series([1]).transform({"A": {"B": ["sum"]}})
+
+
 class TestSeriesMap:
     def test_map(self, datetime_series):
         index, data = tm.getMixedTypeDict()