Skip to content

Commit 25efd37

Browse files
committed
Removed vectorization from groupby pct change
1 parent 41beb4a commit 25efd37

File tree

2 files changed

+15
-33
lines changed

2 files changed

+15
-33
lines changed

pandas/core/groupby/groupby.py

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2070,17 +2070,10 @@ def shift(self, periods=1, freq=None, axis=0):
20702070
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
20712071
axis=0):
20722072
"""Calcuate pct_change of each value to previous entry in group"""
2073-
if (freq is not None or axis != 0) or not self.grouper.is_monotonic:
2074-
return self.apply(lambda x: x.pct_change(periods=periods,
2075-
fill_method=fill_method,
2076-
limit=limit, freq=freq,
2077-
axis=axis))
2078-
2079-
filled = getattr(self, fill_method)(limit=limit).drop(
2080-
self.grouper.names, axis=1)
2081-
shifted = filled.shift(periods=periods, freq=freq)
2082-
2083-
return (filled / shifted) - 1
2073+
return self.apply(lambda x: x.pct_change(periods=periods,
2074+
fill_method=fill_method,
2075+
limit=limit, freq=freq,
2076+
axis=axis))
20842077

20852078
@Substitution(name='groupby')
20862079
@Appender(_doc_template)
@@ -3943,15 +3936,9 @@ def _apply_to_column_groupbys(self, func):
39433936

39443937
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None):
39453938
"""Calcuate pct_change of each value to previous entry in group"""
3946-
if not self.grouper.is_monotonic:
3947-
return self.apply(lambda x: x.pct_change(periods=periods,
3948-
fill_method=fill_method,
3949-
limit=limit, freq=freq))
3950-
3951-
filled = getattr(self, fill_method)(limit=limit)
3952-
shifted = filled.shift(periods=periods, freq=freq)
3953-
3954-
return (filled / shifted) - 1
3939+
return self.apply(lambda x: x.pct_change(periods=periods,
3940+
fill_method=fill_method,
3941+
limit=limit, freq=freq))
39553942

39563943

39573944
class NDFrameGroupBy(GroupBy):

pandas/tests/groupby/test_transform.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -729,15 +729,13 @@ def interweave(list_obj):
729729
(-1, 'ffill', None), (-1, 'ffill', 1),
730730
(-1, 'bfill', None), (-1, 'bfill', 1)])
731731
def test_pct_change(test_series, shuffle, periods, fill_method, limit):
732-
# Groupby pct change uses an apply if monotonic
733-
# and a vectorized operation if non-monotonic
734-
# Shuffle parameter tests each
735-
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
732+
vals = [3, np.nan, 1, 2, 4, 10, np.nan, np.nan]
736733
keys = ['a', 'b']
737734
key_v = [k for j in list(map(lambda x: [x] * len(vals), keys)) for k in j]
738735
df = DataFrame({'key': key_v, 'vals': vals * 2})
739736
if shuffle:
740-
df = df.reindex(np.random.permutation(len(df))).reset_index(drop=True)
737+
order = np.random.RandomState(seed=42).permutation(len(df))
738+
df = df.reindex(order).reset_index(drop=True)
741739

742740
manual_apply = []
743741
for k in keys:
@@ -746,30 +744,27 @@ def test_pct_change(test_series, shuffle, periods, fill_method, limit):
746744
fill_method=fill_method,
747745
limit=limit))
748746
exp_vals = pd.concat(manual_apply).reset_index(drop=True)
749-
exp = pd.DataFrame(exp_vals, columns=['_pct_change'])
747+
exp = pd.DataFrame(exp_vals, columns=['A'])
750748
grp = df.groupby('key')
751749

752750
def get_result(grp_obj):
753751
return grp_obj.pct_change(periods=periods,
754752
fill_method=fill_method,
755753
limit=limit)
756754

757-
# Specifically test when monotonic and not monotonic
758-
759755
if test_series:
760-
exp = exp.loc[:, '_pct_change']
756+
exp = exp.loc[:, 'A']
761757
grp = grp['vals']
762758
result = get_result(grp)
763-
# Resort order by keys to compare to expected values
764-
df.insert(0, '_pct_change', result)
759+
df.insert(0, 'A', result)
765760
result = df.sort_values(by='key')
766-
result = result.loc[:, '_pct_change']
761+
result = result.loc[:, 'A']
767762
result = result.reset_index(drop=True)
768763
tm.assert_series_equal(result, exp)
769764
else:
770765
result = get_result(grp)
771766
result.reset_index(drop=True, inplace=True)
772-
result.columns = ['_pct_change']
767+
result.columns = ['A']
773768
tm.assert_frame_equal(result, exp)
774769

775770

0 commit comments

Comments
 (0)