Skip to content

Commit ae396ff

Browse files
committed
WPI: fast tranform on DataFrame
1 parent 0eb4a82 commit ae396ff

File tree

2 files changed

+62
-21
lines changed

2 files changed

+62
-21
lines changed

doc/source/v0.14.1.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ Performance
137137
~~~~~~~~~~~
138138
- Improvements in dtype inference for numeric operations involving yielding performance gains for dtypes: ``int64``, ``timedelta64``, ``datetime64`` (:issue:`7223`)
139139
- Improvements in Series.transform for signifcant performance gains (:issue`6496`)
140+
- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue`7383`)
140141

141142

142143

pandas/core/groupby.py

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2701,27 +2701,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
27012701
return self._concat_objects(keys, values,
27022702
not_indexed_same=not_indexed_same)
27032703

2704-
def transform(self, func, *args, **kwargs):
2705-
"""
2706-
Call function producing a like-indexed DataFrame on each group and
2707-
return a DataFrame having the same indexes as the original object
2708-
filled with the transformed values
2709-
2710-
Parameters
2711-
----------
2712-
f : function
2713-
Function to apply to each subframe
2714-
2715-
Notes
2716-
-----
2717-
Each subframe is endowed the attribute 'name' in case you need to know
2718-
which group you are working on.
2719-
2720-
Examples
2721-
--------
2722-
>>> grouped = df.groupby(lambda x: mapping[x])
2723-
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
2724-
"""
2704+
def _transform_general(self, func, *args, **kwargs):
27252705
from pandas.tools.merge import concat
27262706

27272707
applied = []
@@ -2763,6 +2743,66 @@ def transform(self, func, *args, **kwargs):
27632743
concatenated.sort_index(inplace=True)
27642744
return concatenated
27652745

2746+
def transform(self, func, *args, **kwargs):
2747+
"""
2748+
Call function producing a like-indexed DataFrame on each group and
2749+
return a DataFrame having the same indexes as the original object
2750+
filled with the transformed values
2751+
2752+
Parameters
2753+
----------
2754+
f : function
2755+
Function to apply to each subframe
2756+
2757+
Notes
2758+
-----
2759+
Each subframe is endowed the attribute 'name' in case you need to know
2760+
which group you are working on.
2761+
2762+
Examples
2763+
--------
2764+
>>> grouped = df.groupby(lambda x: mapping[x])
2765+
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
2766+
"""
2767+
2768+
# try to do a fast transform via merge if possible
2769+
try:
2770+
obj = self._obj_with_exclusions
2771+
if isinstance(func, compat.string_types):
2772+
result = getattr(self, func)(*args, **kwargs)
2773+
else:
2774+
cyfunc = _intercept_cython(func)
2775+
if cyfunc and not args and not kwargs:
2776+
result = getattr(self, cyfunc)()
2777+
else:
2778+
return self._transform_general(func, *args, **kwargs)
2779+
except:
2780+
return self._transform_general(func, *args, **kwargs)
2781+
2782+
# a reduction transform
2783+
if not isinstance(result, DataFrame):
2784+
return self._transform_general(func, *args, **kwargs)
2785+
2786+
# nuiscance columns
2787+
if not result.columns.equals(obj.columns):
2788+
return self._transform_general(func, *args, **kwargs)
2789+
2790+
# a grouped that doesn't preserve the index, remap index based on the grouper
2791+
# and broadcast it
2792+
if not isinstance(obj.index,MultiIndex) and type(result.index) != type(obj.index):
2793+
results = obj.values.copy()
2794+
for (name, group), (i, row) in zip(self, result.iterrows()):
2795+
indexer = self._get_index(name)
2796+
results[indexer] = np.tile(row.values,len(indexer)).reshape(len(indexer),-1)
2797+
return DataFrame(results,columns=result.columns,index=obj.index).convert_objects()
2798+
2799+
# we can merge the result in
2800+
# GH 7383
2801+
names = result.columns
2802+
result = obj.merge(result, how='outer', left_index=True, right_index=True).ix[:,-result.shape[1]:]
2803+
result.columns = names
2804+
return result
2805+
27662806
def _define_paths(self, func, *args, **kwargs):
27672807
if isinstance(func, compat.string_types):
27682808
fast_path = lambda group: getattr(group, func)(*args, **kwargs)

0 commit comments

Comments
 (0)