Closed
Description
Probably an edge case but something that bit me
import pandas, numpy as np
df = pandas.DataFrame({'a':[0]*5000+[1]*5000, 'b':[2]*5000+[1]*5000 , 'c': ['a']*5000 + ['b']*5000})
%timeit df.replace(1,3, inplace=True)
df = pandas.DataFrame({'a':[0]*5000+[1]*5000, 'b':[2]*5000+[1]*5000 , 'c': ['a']*5000 + ['b']*5000})
def rep(df):
for c in df.columns:
df.loc[df[c]==1,c] = 3
return df
%timeit rep(df)
pandas.set_option('use_inf_as_null', True)
df = pandas.DataFrame({'a':[0]*5000+[1]*5000, 'b':[2]*5000+[1]*5000, 'c': ['a']*5000 + ['b']*5000})
%timeit df.replace(1,3, inplace=True)
df = pandas.DataFrame({'a':[0]*5000+[1]*5000, 'b':[2]*5000+[1]*5000 , 'c': ['a']*5000 + ['b']*5000})
def rep(df):
for c in df.columns:
df.loc[df[c]==1,c] = 3
return df
%timeit rep(df)
One of these things is not like the other!
1000 loops, best of 3: 1.77 ms per loop
100 loops, best of 3: 5.89 ms per loop
1 loop, best of 3: 2.24 s per loop
100 loops, best of 3: 5.92 ms per loop
Pandas 0.20.1