Skip to content

Autopep8 #2632

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 5, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion bench/bench_dense_to_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@
this_rng = rng2[:-i]
data[100:] = np.nan
series[i] = SparseSeries(data, index=this_rng)

7 changes: 7 additions & 0 deletions bench/bench_get_put_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,46 @@
N = 1000
K = 50


def _random_index(howmany):
return Index([rands(10) for _ in xrange(howmany)])

df = DataFrame(np.random.randn(N, K), index=_random_index(N),
columns=_random_index(K))


def get1():
for col in df.columns:
for row in df.index:
_ = df[col][row]


def get2():
for col in df.columns:
for row in df.index:
_ = df.get_value(row, col)


def put1():
for col in df.columns:
for row in df.index:
df[col][row] = 0


def put2():
for col in df.columns:
for row in df.index:
df.set_value(row, col, 0)


def resize1():
buf = DataFrame()
for col in df.columns:
for row in df.index:
buf = buf.set_value(row, col, 5.)
return buf


def resize2():
from collections import defaultdict

Expand Down
9 changes: 6 additions & 3 deletions bench/bench_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@
random.shuffle(foo)
random.shuffle(foo2)

df = DataFrame({'A' : foo,
'B' : foo2,
'C' : np.random.randn(n * k)})
df = DataFrame({'A': foo,
'B': foo2,
'C': np.random.randn(n * k)})

import pandas._sandbox as sbx


def f():
table = sbx.StringHashTable(len(df))
ret = table.factorize(df['A'])
return ret


def g():
table = sbx.PyObjectHashTable(len(df))
ret = table.factorize(df['A'])
Expand Down
44 changes: 26 additions & 18 deletions bench/bench_join_panel.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,55 @@
# reasonably effecient
# reasonably efficient


def create_panels_append(cls, panels):
""" return an append list of panels """
panels = [ a for a in panels if a is not None ]
panels = [a for a in panels if a is not None]
# corner cases
if len(panels) == 0:
return None
elif len(panels) == 1:
return panels[0]
elif len(panels) == 2 and panels[0] == panels[1]:
return panels[0]
#import pdb; pdb.set_trace()
# import pdb; pdb.set_trace()
# create a joint index for the axis

def joint_index_for_axis(panels, axis):
s = set()
for p in panels:
s.update(list(getattr(p,axis)))
s.update(list(getattr(p, axis)))
return sorted(list(s))

def reindex_on_axis(panels, axis, axis_reindex):
new_axis = joint_index_for_axis(panels, axis)
new_panels = [ p.reindex(**{ axis_reindex : new_axis, 'copy' : False}) for p in panels ]
new_panels = [p.reindex(**{axis_reindex: new_axis,
'copy': False}) for p in panels]
return new_panels, new_axis
# create the joint major index, dont' reindex the sub-panels - we are appending
# create the joint major index, dont' reindex the sub-panels - we are
# appending
major = joint_index_for_axis(panels, 'major_axis')
# reindex on minor axis
panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor')
# reindex on items
panels, items = reindex_on_axis(panels, 'items', 'items')
# concatenate values
try:
values = np.concatenate([ p.values for p in panels ],axis=1)
values = np.concatenate([p.values for p in panels], axis=1)
except (Exception), detail:
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join([ "%s" % p for p in panels ]),str(detail)))
#pm('append - create_panel')
p = Panel(values, items = items, major_axis = major, minor_axis = minor )
#pm('append - done')
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s"
% (','.join(["%s" % p for p in panels]), str(detail)))
# pm('append - create_panel')
p = Panel(values, items=items, major_axis=major,
minor_axis=minor)
# pm('append - done')
return p



# does the job but inefficient (better to handle like you read a table in pytables...e.g create a LongPanel then convert to Wide)

# does the job but inefficient (better to handle like you read a table in
# pytables...e.g create a LongPanel then convert to Wide)
def create_panels_join(cls, panels):
""" given an array of panels's, create a single panel """
panels = [ a for a in panels if a is not None ]
panels = [a for a in panels if a is not None]
# corner cases
if len(panels) == 0:
return None
Expand All @@ -62,16 +68,18 @@ def create_panels_join(cls, panels):
for minor_i, minor_index in panel.minor_axis.indexMap.items():
for major_i, major_index in panel.major_axis.indexMap.items():
try:
d[(minor_i,major_i,item)] = values[item_index,major_index,minor_index]
d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index]
except:
pass
# stack the values
minor = sorted(list(minor))
major = sorted(list(major))
items = sorted(list(items))
# create the 3d stack (items x columns x indicies)
data = np.dstack([ np.asarray([ np.asarray([ d.get((minor_i,major_i,item),np.nan) for item in items ]) for major_i in major ]).transpose() for minor_i in minor ])
data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan)
for item in items])
for major_i in major]).transpose()
for minor_i in minor])
# construct the panel
return Panel(data, items, major, minor)
add_class_method(Panel, create_panels_join, 'join_many')

11 changes: 10 additions & 1 deletion bench/bench_khash_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@
pid = os.getpid()
proc = psutil.Process(pid)


def object_test_data(n):
pass


def string_test_data(n):
return np.array([rands(10) for _ in xrange(n)], dtype='O')


def int_test_data(n):
return np.arange(n, dtype='i8')

Expand All @@ -30,17 +33,21 @@ def int_test_data(n):
#----------------------------------------------------------------------
# Benchmark 1: map_locations


def map_locations_python_object():
arr = string_test_data(N)
return _timeit(lambda: lib.map_indices_object(arr))


def map_locations_khash_object():
arr = string_test_data(N)

def f():
table = sbx.PyObjectHashTable(len(arr))
table.map_locations(arr)
return _timeit(f)


def _timeit(f, iterations=10):
start = time.time()
for _ in xrange(iterations):
Expand All @@ -51,17 +58,20 @@ def _timeit(f, iterations=10):
#----------------------------------------------------------------------
# Benchmark 2: lookup_locations


def lookup_python(values):
table = lib.map_indices_object(values)
return _timeit(lambda: lib.merge_indexer_object(values, table))


def lookup_khash(values):
table = sbx.PyObjectHashTable(len(values))
table.map_locations(values)
locs = table.lookup_locations(values)
# elapsed = _timeit(lambda: table.lookup_locations2(values))
return table


def leak(values):
for _ in xrange(100):
print proc.get_memory_info()
Expand All @@ -75,4 +85,3 @@ def leak(values):

#----------------------------------------------------------------------
# Benchmark 4: factorize

15 changes: 9 additions & 6 deletions bench/bench_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
N = 10000
ngroups = 10


def get_test_data(ngroups=100, n=N):
unique_groups = range(ngroups)
arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
Expand Down Expand Up @@ -38,10 +39,10 @@ def get_test_data(ngroups=100, n=N):
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

left = DataFrame({'key' : key, 'key2':key2,
'value' : np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
'value2' : np.random.randn(8000)})
left = DataFrame({'key': key, 'key2': key2,
'value': np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
'value2': np.random.randn(8000)})

right2 = right.append(right, ignore_index=True)

Expand Down Expand Up @@ -78,7 +79,8 @@ def get_test_data(ngroups=100, n=N):

all_results = all_results.div(all_results['pandas'], axis=0)

all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']]
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
'base::merge']]

sort_results = DataFrame.from_items([('pandas', results['sort']),
('R', r_results['base::merge'])])
Expand All @@ -102,4 +104,5 @@ def get_test_data(ngroups=100, n=N):

all_results = presults.join(r_results)
all_results = all_results.div(all_results['pandas'], axis=0)
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']]
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
'base::merge']]
22 changes: 12 additions & 10 deletions bench/bench_merge_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

left = DataFrame({'key' : key, 'key2':key2,
'value' : np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
'value2' : np.random.randn(8000)})
left = DataFrame({'key': key, 'key2': key2,
'value': np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
'value2': np.random.randn(8000)})

# right2 = right.append(right, ignore_index=True)
# right = right2
Expand All @@ -30,8 +30,10 @@
create_sql_indexes = True

conn = sqlite3.connect(':memory:')
conn.execute('create table left( key varchar(10), key2 varchar(10), value int);')
conn.execute('create table right( key varchar(10), key2 varchar(10), value2 int);')
conn.execute(
'create table left( key varchar(10), key2 varchar(10), value int);')
conn.execute(
'create table right( key varchar(10), key2 varchar(10), value2 int);')
conn.executemany('insert into left values (?, ?, ?)',
zip(key, key2, left['value']))
conn.executemany('insert into right values (?, ?, ?)',
Expand All @@ -43,7 +45,7 @@
conn.execute('create index right_ix on right(key, key2)')


join_methods = ['inner', 'left outer', 'left'] # others not supported
join_methods = ['inner', 'left outer', 'left'] # others not supported
sql_results = DataFrame(index=join_methods, columns=[False])
niter = 5
for sort in [False]:
Expand All @@ -61,8 +63,8 @@

if sort:
sql = '%s order by key, key2' % sql
f = lambda: list(conn.execute(sql)) # list fetches results
g = lambda: conn.execute(sql) # list fetches results
f = lambda: list(conn.execute(sql)) # list fetches results
g = lambda: conn.execute(sql) # list fetches results
gc.disable()
start = time.time()
# for _ in xrange(niter):
Expand All @@ -74,7 +76,7 @@
conn.commit()

sql_results[sort][join_method] = elapsed
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
sql_results.index = ['inner', 'outer', 'left']

sql = """select *
Expand Down
28 changes: 14 additions & 14 deletions bench/bench_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
arr1 = np.arange(N)
index = Index(np.arange(N))

off = N//10
arr1[off : 2 * off] = np.NaN
arr1[4*off: 5 * off] = np.NaN
arr1[8*off: 9 * off] = np.NaN
off = N // 10
arr1[off: 2 * off] = np.NaN
arr1[4 * off: 5 * off] = np.NaN
arr1[8 * off: 9 * off] = np.NaN

arr2 = np.arange(N)
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN

s1 = SparseSeries(arr1, index=index)
Expand All @@ -38,6 +38,7 @@

sdf = dm.to_sparse()


def new_data_like(sdf):
new_data = {}
for col, series in sdf.iteritems():
Expand All @@ -52,22 +53,22 @@ def new_data_like(sdf):
# for col, ser in dm.iteritems():
# data[col] = SparseSeries(ser)

dwp = Panel.fromDict({'foo' : dm})
dwp = Panel.fromDict({'foo': dm})
# sdf = SparseDataFrame(data)


lp = stack_sparse_frame(sdf)


swp = SparsePanel({'A' : sdf})
swp = SparsePanel({'A' : sdf,
'B' : sdf,
'C' : sdf,
'D' : sdf})
swp = SparsePanel({'A': sdf})
swp = SparsePanel({'A': sdf,
'B': sdf,
'C': sdf,
'D': sdf})

y = sdf
x = SparsePanel({'x1' : sdf + new_data_like(sdf) / 10,
'x2' : sdf + new_data_like(sdf) / 10})
x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10,
'x2': sdf + new_data_like(sdf) / 10})

dense_y = sdf
dense_x = x.to_dense()
Expand All @@ -89,4 +90,3 @@ def new_data_like(sdf):
reload(face)

# model = face.ols(y=y, x=x)

Loading