diff --git a/bench/bench_dense_to_sparse.py b/bench/bench_dense_to_sparse.py index 349d3b31e965f..f76daab5d8289 100644 --- a/bench/bench_dense_to_sparse.py +++ b/bench/bench_dense_to_sparse.py @@ -12,4 +12,3 @@ this_rng = rng2[:-i] data[100:] = np.nan series[i] = SparseSeries(data, index=this_rng) - diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py index 5aa984d39069a..419e8f603e5ae 100644 --- a/bench/bench_get_put_value.py +++ b/bench/bench_get_put_value.py @@ -4,32 +4,38 @@ N = 1000 K = 50 + def _random_index(howmany): return Index([rands(10) for _ in xrange(howmany)]) df = DataFrame(np.random.randn(N, K), index=_random_index(N), columns=_random_index(K)) + def get1(): for col in df.columns: for row in df.index: _ = df[col][row] + def get2(): for col in df.columns: for row in df.index: _ = df.get_value(row, col) + def put1(): for col in df.columns: for row in df.index: df[col][row] = 0 + def put2(): for col in df.columns: for row in df.index: df.set_value(row, col, 0) + def resize1(): buf = DataFrame() for col in df.columns: @@ -37,6 +43,7 @@ def resize1(): buf = buf.set_value(row, col, 5.) return buf + def resize2(): from collections import defaultdict diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py index 78e2c51abcbc1..807d3449e1fcb 100644 --- a/bench/bench_groupby.py +++ b/bench/bench_groupby.py @@ -12,16 +12,19 @@ random.shuffle(foo) random.shuffle(foo2) -df = DataFrame({'A' : foo, - 'B' : foo2, - 'C' : np.random.randn(n * k)}) +df = DataFrame({'A': foo, + 'B': foo2, + 'C': np.random.randn(n * k)}) import pandas._sandbox as sbx + def f(): table = sbx.StringHashTable(len(df)) ret = table.factorize(df['A']) return ret + + def g(): table = sbx.PyObjectHashTable(len(df)) ret = table.factorize(df['A']) diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py index 59a4711c4b6d2..0e484fb496036 100644 --- a/bench/bench_join_panel.py +++ b/bench/bench_join_panel.py @@ -1,8 +1,9 @@ -# reasonably effecient +# reasonably efficient + def create_panels_append(cls, panels): """ return an append list of panels """ - panels = [ a for a in panels if a is not None ] + panels = [a for a in panels if a is not None] # corner cases if len(panels) == 0: return None @@ -10,18 +11,22 @@ def create_panels_append(cls, panels): return panels[0] elif len(panels) == 2 and panels[0] == panels[1]: return panels[0] - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() # create a joint index for the axis + def joint_index_for_axis(panels, axis): s = set() for p in panels: - s.update(list(getattr(p,axis))) + s.update(list(getattr(p, axis))) return sorted(list(s)) + def reindex_on_axis(panels, axis, axis_reindex): new_axis = joint_index_for_axis(panels, axis) - new_panels = [ p.reindex(**{ axis_reindex : new_axis, 'copy' : False}) for p in panels ] + new_panels = [p.reindex(**{axis_reindex: new_axis, + 'copy': False}) for p in panels] return new_panels, new_axis - # create the joint major index, dont' reindex the sub-panels - we are appending + # create the joint major index, dont' reindex the sub-panels - we are + # appending major = joint_index_for_axis(panels, 'major_axis') # reindex on minor axis panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor') @@ -29,21 +34,22 @@ def reindex_on_axis(panels, axis, axis_reindex): panels, items = reindex_on_axis(panels, 'items', 'items') # concatenate values try: - values = np.concatenate([ p.values for p in panels ],axis=1) + values = np.concatenate([p.values for p in panels], axis=1) except (Exception), detail: - raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join([ "%s" % p for p in panels ]),str(detail))) - #pm('append - create_panel') - p = Panel(values, items = items, major_axis = major, minor_axis = minor ) - #pm('append - done') + raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" + % (','.join(["%s" % p for p in panels]), str(detail))) + # pm('append - create_panel') + p = Panel(values, items=items, major_axis=major, + minor_axis=minor) + # pm('append - done') return p - -# does the job but inefficient (better to handle like you read a table in pytables...e.g create a LongPanel then convert to Wide) - +# does the job but inefficient (better to handle like you read a table in +# pytables...e.g create a LongPanel then convert to Wide) def create_panels_join(cls, panels): """ given an array of panels's, create a single panel """ - panels = [ a for a in panels if a is not None ] + panels = [a for a in panels if a is not None] # corner cases if len(panels) == 0: return None @@ -62,7 +68,7 @@ def create_panels_join(cls, panels): for minor_i, minor_index in panel.minor_axis.indexMap.items(): for major_i, major_index in panel.major_axis.indexMap.items(): try: - d[(minor_i,major_i,item)] = values[item_index,major_index,minor_index] + d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index] except: pass # stack the values @@ -70,8 +76,10 @@ def create_panels_join(cls, panels): major = sorted(list(major)) items = sorted(list(items)) # create the 3d stack (items x columns x indicies) - data = np.dstack([ np.asarray([ np.asarray([ d.get((minor_i,major_i,item),np.nan) for item in items ]) for major_i in major ]).transpose() for minor_i in minor ]) + data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan) + for item in items]) + for major_i in major]).transpose() + for minor_i in minor]) # construct the panel return Panel(data, items, major, minor) add_class_method(Panel, create_panels_join, 'join_many') - diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py index 1d803bec8eab2..fce3288e3294d 100644 --- a/bench/bench_khash_dict.py +++ b/bench/bench_khash_dict.py @@ -16,12 +16,15 @@ pid = os.getpid() proc = psutil.Process(pid) + def object_test_data(n): pass + def string_test_data(n): return np.array([rands(10) for _ in xrange(n)], dtype='O') + def int_test_data(n): return np.arange(n, dtype='i8') @@ -30,17 +33,21 @@ def int_test_data(n): #---------------------------------------------------------------------- # Benchmark 1: map_locations + def map_locations_python_object(): arr = string_test_data(N) return _timeit(lambda: lib.map_indices_object(arr)) + def map_locations_khash_object(): arr = string_test_data(N) + def f(): table = sbx.PyObjectHashTable(len(arr)) table.map_locations(arr) return _timeit(f) + def _timeit(f, iterations=10): start = time.time() for _ in xrange(iterations): @@ -51,10 +58,12 @@ def _timeit(f, iterations=10): #---------------------------------------------------------------------- # Benchmark 2: lookup_locations + def lookup_python(values): table = lib.map_indices_object(values) return _timeit(lambda: lib.merge_indexer_object(values, table)) + def lookup_khash(values): table = sbx.PyObjectHashTable(len(values)) table.map_locations(values) @@ -62,6 +71,7 @@ def lookup_khash(values): # elapsed = _timeit(lambda: table.lookup_locations2(values)) return table + def leak(values): for _ in xrange(100): print proc.get_memory_info() @@ -75,4 +85,3 @@ def leak(values): #---------------------------------------------------------------------- # Benchmark 4: factorize - diff --git a/bench/bench_merge.py b/bench/bench_merge.py index c9abd2c37c462..11f8c29a2897b 100644 --- a/bench/bench_merge.py +++ b/bench/bench_merge.py @@ -5,6 +5,7 @@ N = 10000 ngroups = 10 + def get_test_data(ngroups=100, n=N): unique_groups = range(ngroups) arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) @@ -38,10 +39,10 @@ def get_test_data(ngroups=100, n=N): key = np.tile(indices[:8000], 10) key2 = np.tile(indices2[:8000], 10) -left = DataFrame({'key' : key, 'key2':key2, - 'value' : np.random.randn(80000)}) -right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:], - 'value2' : np.random.randn(8000)}) +left = DataFrame({'key': key, 'key2': key2, + 'value': np.random.randn(80000)}) +right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:], + 'value2': np.random.randn(8000)}) right2 = right.append(right, ignore_index=True) @@ -78,7 +79,8 @@ def get_test_data(ngroups=100, n=N): all_results = all_results.div(all_results['pandas'], axis=0) -all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']] +all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', + 'base::merge']] sort_results = DataFrame.from_items([('pandas', results['sort']), ('R', r_results['base::merge'])]) @@ -102,4 +104,5 @@ def get_test_data(ngroups=100, n=N): all_results = presults.join(r_results) all_results = all_results.div(all_results['pandas'], axis=0) -all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']] +all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', + 'base::merge']] diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py index a05a7c896b3d2..d13b296698b97 100644 --- a/bench/bench_merge_sqlite.py +++ b/bench/bench_merge_sqlite.py @@ -13,10 +13,10 @@ key = np.tile(indices[:8000], 10) key2 = np.tile(indices2[:8000], 10) -left = DataFrame({'key' : key, 'key2':key2, - 'value' : np.random.randn(80000)}) -right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:], - 'value2' : np.random.randn(8000)}) +left = DataFrame({'key': key, 'key2': key2, + 'value': np.random.randn(80000)}) +right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:], + 'value2': np.random.randn(8000)}) # right2 = right.append(right, ignore_index=True) # right = right2 @@ -30,8 +30,10 @@ create_sql_indexes = True conn = sqlite3.connect(':memory:') -conn.execute('create table left( key varchar(10), key2 varchar(10), value int);') -conn.execute('create table right( key varchar(10), key2 varchar(10), value2 int);') +conn.execute( + 'create table left( key varchar(10), key2 varchar(10), value int);') +conn.execute( + 'create table right( key varchar(10), key2 varchar(10), value2 int);') conn.executemany('insert into left values (?, ?, ?)', zip(key, key2, left['value'])) conn.executemany('insert into right values (?, ?, ?)', @@ -43,7 +45,7 @@ conn.execute('create index right_ix on right(key, key2)') -join_methods = ['inner', 'left outer', 'left'] # others not supported +join_methods = ['inner', 'left outer', 'left'] # others not supported sql_results = DataFrame(index=join_methods, columns=[False]) niter = 5 for sort in [False]: @@ -61,8 +63,8 @@ if sort: sql = '%s order by key, key2' % sql - f = lambda: list(conn.execute(sql)) # list fetches results - g = lambda: conn.execute(sql) # list fetches results + f = lambda: list(conn.execute(sql)) # list fetches results + g = lambda: conn.execute(sql) # list fetches results gc.disable() start = time.time() # for _ in xrange(niter): @@ -74,7 +76,7 @@ conn.commit() sql_results[sort][join_method] = elapsed - sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort'] + sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort'] sql_results.index = ['inner', 'outer', 'left'] sql = """select * diff --git a/bench/bench_sparse.py b/bench/bench_sparse.py index 4003415267b0c..600b3d05c5f78 100644 --- a/bench/bench_sparse.py +++ b/bench/bench_sparse.py @@ -11,13 +11,13 @@ arr1 = np.arange(N) index = Index(np.arange(N)) -off = N//10 -arr1[off : 2 * off] = np.NaN -arr1[4*off: 5 * off] = np.NaN -arr1[8*off: 9 * off] = np.NaN +off = N // 10 +arr1[off: 2 * off] = np.NaN +arr1[4 * off: 5 * off] = np.NaN +arr1[8 * off: 9 * off] = np.NaN arr2 = np.arange(N) -arr2[3 * off // 2: 2 * off + off // 2] = np.NaN +arr2[3 * off // 2: 2 * off + off // 2] = np.NaN arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN s1 = SparseSeries(arr1, index=index) @@ -38,6 +38,7 @@ sdf = dm.to_sparse() + def new_data_like(sdf): new_data = {} for col, series in sdf.iteritems(): @@ -52,22 +53,22 @@ def new_data_like(sdf): # for col, ser in dm.iteritems(): # data[col] = SparseSeries(ser) -dwp = Panel.fromDict({'foo' : dm}) +dwp = Panel.fromDict({'foo': dm}) # sdf = SparseDataFrame(data) lp = stack_sparse_frame(sdf) -swp = SparsePanel({'A' : sdf}) -swp = SparsePanel({'A' : sdf, - 'B' : sdf, - 'C' : sdf, - 'D' : sdf}) +swp = SparsePanel({'A': sdf}) +swp = SparsePanel({'A': sdf, + 'B': sdf, + 'C': sdf, + 'D': sdf}) y = sdf -x = SparsePanel({'x1' : sdf + new_data_like(sdf) / 10, - 'x2' : sdf + new_data_like(sdf) / 10}) +x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10, + 'x2': sdf + new_data_like(sdf) / 10}) dense_y = sdf dense_x = x.to_dense() @@ -89,4 +90,3 @@ def new_data_like(sdf): reload(face) # model = face.ols(y=y, x=x) - diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py index fc8a3c6b743ea..3ddd647a35bf6 100644 --- a/bench/bench_take_indexing.py +++ b/bench/bench_take_indexing.py @@ -29,8 +29,9 @@ n = 1000 + def _timeit(stmt, size, k=5, iters=1000): - timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k)) + timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k)) return timer.timeit(n) / n for sz, its in zip(sizes, iters): @@ -39,9 +40,9 @@ def _timeit(stmt, size, k=5, iters=1000): take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its)) cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its)) -df = DataFrame({'fancy' : fancy_2d, - 'take' : take_2d, - 'cython' : cython_2d}) +df = DataFrame({'fancy': fancy_2d, + 'take': take_2d, + 'cython': cython_2d}) print df diff --git a/bench/bench_unique.py b/bench/bench_unique.py index 3b5ece66deae6..392d3b326bf09 100644 --- a/bench/bench_unique.py +++ b/bench/bench_unique.py @@ -14,8 +14,10 @@ labels2 = np.tile(groups2, N // K) data = np.random.randn(N) + def timeit(f, niter): - import gc, time + import gc + import time gc.disable() start = time.time() for _ in xrange(niter): @@ -24,12 +26,14 @@ def timeit(f, niter): gc.enable() return elapsed + def algo1(): unique_labels = np.unique(labels) result = np.empty(len(unique_labels)) for i, label in enumerate(unique_labels): result[i] = data[labels == label].sum() + def algo2(): unique_labels = np.unique(labels) indices = lib.groupby_indices(labels) @@ -38,6 +42,7 @@ def algo2(): for i, label in enumerate(unique_labels): result[i] = data.take(indices[label]).sum() + def algo3_nosort(): rizer = lib.DictFactorizer() labs, counts = rizer.factorize(labels, sort=False) @@ -45,6 +50,7 @@ def algo3_nosort(): out = np.empty(k) lib.group_add(out, counts, data, labs) + def algo3_sort(): rizer = lib.DictFactorizer() labs, counts = rizer.factorize(labels, sort=True) @@ -67,6 +73,7 @@ def algo3_sort(): x = [int(y) for y in x] data = np.random.uniform(0, 1, 100000) + def f(): from itertools import izip # groupby sum @@ -76,6 +83,7 @@ def f(): except KeyError: counts[k] = v + def f2(): rizer = lib.DictFactorizer() labs, counts = rizer.factorize(xarr, sort=False) @@ -83,6 +91,7 @@ def f2(): out = np.empty(k) lib.group_add(out, counts, data, labs) + def algo4(): rizer = lib.DictFactorizer() labs1, _ = rizer.factorize(labels, sort=False) @@ -137,6 +146,7 @@ def algo4(): pid = os.getpid() proc = psutil.Process(pid) + def dict_unique(values, expected_K, sort=False, memory=False): if memory: gc.collect() @@ -154,6 +164,7 @@ def dict_unique(values, expected_K, sort=False, memory=False): assert(len(result) == expected_K) return result + def khash_unique(values, expected_K, size_hint=False, sort=False, memory=False): if memory: @@ -176,8 +187,9 @@ def khash_unique(values, expected_K, size_hint=False, sort=False, result.sort() assert(len(result) == expected_K) + def khash_unique_str(values, expected_K, size_hint=False, sort=False, - memory=False): + memory=False): if memory: gc.collect() before_mem = proc.get_memory_info().rss @@ -198,6 +210,7 @@ def khash_unique_str(values, expected_K, size_hint=False, sort=False, result.sort() assert(len(result) == expected_K) + def khash_unique_int64(values, expected_K, size_hint=False, sort=False): if size_hint: rizer = lib.Int64HashTable(len(values)) @@ -211,6 +224,7 @@ def khash_unique_int64(values, expected_K, size_hint=False, sort=False): result.sort() assert(len(result) == expected_K) + def hash_bench(): numpy = [] dict_based = [] @@ -248,9 +262,9 @@ def hash_bench(): # 'dict, sort', 'numpy.unique'], # index=Ks) - unique_timings = DataFrame({'dict' : dict_based, - 'khash, preallocate' : khash_hint, - 'khash' : khash_nohint}, + unique_timings = DataFrame({'dict': dict_based, + 'khash, preallocate': khash_hint, + 'khash': khash_nohint}, columns=['khash, preallocate', 'khash', 'dict'], index=Ks) @@ -260,5 +274,4 @@ def hash_bench(): plt.xlabel('Number of unique labels') plt.ylabel('Mean execution time') - plt.show() diff --git a/bench/better_unique.py b/bench/better_unique.py index 9ff4823cd628f..982dd88e879da 100644 --- a/bench/better_unique.py +++ b/bench/better_unique.py @@ -42,11 +42,11 @@ def get_test_data(ngroups=100, n=tot): for sz, n in zip(group_sizes, numbers): # wes_timer = timeit.Timer(stmt='better_unique(arr)', # setup=setup % sz) - wes_timer = timeit.Timer(stmt='_tseries.fast_unique(arr)', - setup=setup % sz) + wes_timer = timeit.Timer(stmt='_tseries.fast_unique(arr)', + setup=setup % sz) - numpy_timer = timeit.Timer(stmt='np.unique(arr)', - setup=setup % sz) + numpy_timer = timeit.Timer(stmt='np.unique(arr)', + setup=setup % sz) print n numpy_result = numpy_timer.timeit(number=n) / n @@ -57,7 +57,8 @@ def get_test_data(ngroups=100, n=tot): wes.append(wes_result) numpy.append(numpy_result) -result = DataFrame({'wes' : wes, 'numpy' : numpy}, index=group_sizes) +result = DataFrame({'wes': wes, 'numpy': numpy}, index=group_sizes) + def make_plot(numpy, wes): pass diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py index 6b86d2a6bd283..a9711dbb83b8a 100644 --- a/bench/io_roundtrip.py +++ b/bench/io_roundtrip.py @@ -1,10 +1,12 @@ -import time, os +import time +import os import numpy as np import la import pandas from pandas import datetools, DateRange + def timeit(f, iterations): start = time.clock() @@ -13,6 +15,7 @@ def timeit(f, iterations): return time.clock() - start + def rountrip_archive(N, K=50, iterations=10): # Create data arr = np.random.randn(N, K) @@ -75,12 +78,14 @@ def rountrip_archive(N, K=50, iterations=10): except: pass + def numpy_roundtrip(filename, arr1, arr2): np.savez(filename, arr1=arr1, arr2=arr2) npz = np.load(filename) arr1 = npz['arr1'] arr2 = npz['arr2'] + def larry_roundtrip(filename, lar1, lar2): io = la.IO(filename) io['lar1'] = lar1 @@ -88,6 +93,7 @@ def larry_roundtrip(filename, lar1, lar2): lar1 = io['lar1'] lar2 = io['lar2'] + def pandas_roundtrip(filename, dma1, dma2): # What's the best way to code this? from pandas.io.pytables import HDFStore @@ -97,6 +103,7 @@ def pandas_roundtrip(filename, dma1, dma2): dma1 = store['dma1'] dma2 = store['dma2'] + def pandas_roundtrip_pickle(filename, dma1, dma2): dma1.save(filename) dma1 = pandas.DataFrame.load(filename) diff --git a/bench/serialize.py b/bench/serialize.py index 29eecfc4cc419..63f885a4efa88 100644 --- a/bench/serialize.py +++ b/bench/serialize.py @@ -1,9 +1,11 @@ -import time, os +import time +import os import numpy as np import la import pandas + def timeit(f, iterations): start = time.clock() @@ -12,6 +14,7 @@ def timeit(f, iterations): return time.clock() - start + def roundtrip_archive(N, iterations=10): # Create data @@ -52,12 +55,14 @@ def roundtrip_archive(N, iterations=10): print 'larry (HDF5) %7.4f seconds' % larry_time print 'pandas (HDF5) %7.4f seconds' % pandas_time + def numpy_roundtrip(filename, arr1, arr2): np.savez(filename, arr1=arr1, arr2=arr2) npz = np.load(filename) arr1 = npz['arr1'] arr2 = npz['arr2'] + def larry_roundtrip(filename, lar1, lar2): io = la.IO(filename) io['lar1'] = lar1 @@ -65,6 +70,7 @@ def larry_roundtrip(filename, lar1, lar2): lar1 = io['lar1'] lar2 = io['lar2'] + def pandas_roundtrip(filename, dma1, dma2): from pandas.io.pytables import HDFStore store = HDFStore(filename) @@ -73,6 +79,7 @@ def pandas_roundtrip(filename, dma1, dma2): dma1 = store['dma1'] dma2 = store['dma2'] + def pandas_roundtrip_pickle(filename, dma1, dma2): dma1.save(filename) dma1 = pandas.DataFrame.load(filename) diff --git a/bench/test.py b/bench/test.py index 7fdf94fd1c62d..2ac91468d7b73 100644 --- a/bench/test.py +++ b/bench/test.py @@ -9,6 +9,7 @@ lon = np.random.randint(0, 360, N) data = np.random.randn(N) + def groupby1(lat, lon, data): indexer = np.lexsort((lon, lat)) lat = lat.take(indexer) @@ -25,6 +26,7 @@ def groupby1(lat, lon, data): return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) + def group_mean(lat, lon, data): indexer = np.lexsort((lon, lat)) lat = lat.take(indexer) @@ -39,15 +41,17 @@ def group_mean(lat, lon, data): return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) + def group_mean_naive(lat, lon, data): grouped = collections.defaultdict(list) for lt, ln, da in zip(lat, lon, data): - grouped[(lt, ln)].append(da) + grouped[(lt, ln)].append(da) averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items()) return averaged + def group_agg(values, bounds, f): N = len(values) result = np.empty(len(bounds), dtype=float) @@ -57,7 +61,7 @@ def group_agg(values, bounds, f): else: right_bound = bounds[i + 1] - result[i] = f(values[left_bound : right_bound]) + result[i] = f(values[left_bound: right_bound]) return result diff --git a/bench/zoo_bench.py b/bench/zoo_bench.py index 450d659cdc655..74cb1952a5a2a 100644 --- a/bench/zoo_bench.py +++ b/bench/zoo_bench.py @@ -3,6 +3,8 @@ n = 1000000 # indices = Index([rands(10) for _ in xrange(n)]) + + def sample(values, k): sampler = np.random.permutation(len(values)) return values.take(sampler[:k]) @@ -32,4 +34,3 @@ def sample(values, k): # df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5)) # df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10)) - diff --git a/doc/make.py b/doc/make.py index 5affd4b2414ed..adf34920b9ede 100755 --- a/doc/make.py +++ b/doc/make.py @@ -25,30 +25,35 @@ SPHINX_BUILD = 'sphinxbuild' + def upload_dev(): 'push a copy to the pydata dev directory' if os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'): raise SystemExit('Upload to Pydata Dev failed') + def upload_dev_pdf(): 'push a copy to the pydata dev directory' if os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/dev/'): raise SystemExit('PDF upload to Pydata Dev failed') + def upload_stable(): 'push a copy to the pydata stable directory' if os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'): raise SystemExit('Upload to stable failed') + def upload_stable_pdf(): 'push a copy to the pydata dev directory' if os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/stable/'): raise SystemExit('PDF upload to stable failed') + def upload_prev(ver, doc_root='./'): 'push a copy of older release to appropriate version directory' local_dir = doc_root + 'build/html' @@ -57,7 +62,8 @@ def upload_prev(ver, doc_root='./'): cmd = cmd % (local_dir, remote_dir) print cmd if os.system(cmd): - raise SystemExit('Upload to %s from %s failed' % (remote_dir, local_dir)) + raise SystemExit( + 'Upload to %s from %s failed' % (remote_dir, local_dir)) local_dir = doc_root + 'build/latex' pdf_cmd = 'cd %s; scp pandas.pdf pandas@pandas.pydata.org:%s' @@ -65,6 +71,7 @@ def upload_prev(ver, doc_root='./'): if os.system(pdf_cmd): raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root)) + def build_prev(ver): if os.system('git checkout v%s' % ver) != 1: os.chdir('..') @@ -76,6 +83,7 @@ def build_prev(ver): os.system('python make.py latex') os.system('git checkout master') + def clean(): if os.path.exists('build'): shutil.rmtree('build') @@ -83,12 +91,14 @@ def clean(): if os.path.exists('source/generated'): shutil.rmtree('source/generated') + def html(): check_build() if os.system('sphinx-build -P -b html -d build/doctrees ' 'source build/html'): raise SystemExit("Building HTML failed.") + def latex(): check_build() if sys.platform != 'win32': @@ -108,6 +118,7 @@ def latex(): else: print('latex build has not been tested on windows') + def check_build(): build_dirs = [ 'build', 'build/doctrees', 'build/html', @@ -119,10 +130,12 @@ def check_build(): except OSError: pass + def all(): # clean() html() + def auto_dev_build(debug=False): msg = '' try: @@ -145,6 +158,7 @@ def auto_dev_build(debug=False): msg = str(inst) + '\n' sendmail(step, '[ERROR] ' + msg) + def sendmail(step=None, err_msg=None): from_name, to_name = _get_config() @@ -177,6 +191,7 @@ def sendmail(step=None, err_msg=None): finally: server.close() + def _get_dir(subdir=None): import getpass USERNAME = getpass.getuser() @@ -190,6 +205,7 @@ def _get_dir(subdir=None): conf_dir = '%s/%s' % (HOME, subdir) return conf_dir + def _get_credentials(): tmp_dir = _get_dir() cred = '%s/credentials' % tmp_dir @@ -204,6 +220,7 @@ def _get_credentials(): return server, port, login, pwd + def _get_config(): tmp_dir = _get_dir() with open('%s/addresses' % tmp_dir, 'r') as fh: @@ -211,17 +228,17 @@ def _get_config(): return from_name, to_name funcd = { - 'html' : html, - 'upload_dev' : upload_dev, - 'upload_stable' : upload_stable, - 'upload_dev_pdf' : upload_dev_pdf, - 'upload_stable_pdf' : upload_stable_pdf, - 'latex' : latex, - 'clean' : clean, - 'auto_dev' : auto_dev_build, - 'auto_debug' : lambda: auto_dev_build(True), - 'all' : all, - } + 'html': html, + 'upload_dev': upload_dev, + 'upload_stable': upload_stable, + 'upload_dev_pdf': upload_dev_pdf, + 'upload_stable_pdf': upload_stable_pdf, + 'latex': latex, + 'clean': clean, + 'auto_dev': auto_dev_build, + 'auto_debug': lambda: auto_dev_build(True), + 'all': all, +} small_docs = False @@ -240,10 +257,10 @@ def _get_config(): for arg in sys.argv[1:]: func = funcd.get(arg) if func is None: - raise SystemExit('Do not know how to handle %s; valid args are %s'%( - arg, funcd.keys())) + raise SystemExit('Do not know how to handle %s; valid args are %s' % ( + arg, funcd.keys())) func() else: small_docs = False all() -#os.chdir(current_dir) +# os.chdir(current_dir) diff --git a/doc/plots/stats/moment_plots.py b/doc/plots/stats/moment_plots.py index 7c8b6fb56bf38..9e3a902592c6b 100644 --- a/doc/plots/stats/moment_plots.py +++ b/doc/plots/stats/moment_plots.py @@ -4,11 +4,13 @@ import pandas.util.testing as t import pandas.stats.moments as m + def test_series(n=1000): t.N = n s = t.makeTimeSeries() return s + def plot_timeseries(*args, **kwds): n = len(args) @@ -17,13 +19,12 @@ def plot_timeseries(*args, **kwds): titles = kwds.get('titles', None) for k in range(1, n + 1): - ax = axes[k-1] - ts = args[k-1] + ax = axes[k - 1] + ts = args[k - 1] ax.plot(ts.index, ts.values) if titles: - ax.set_title(titles[k-1]) + ax.set_title(titles[k - 1]) fig.autofmt_xdate() fig.subplots_adjust(bottom=0.10, top=0.95) - diff --git a/doc/plots/stats/moments_expw.py b/doc/plots/stats/moments_expw.py index 699b6cce7ca9f..5fff419b3a940 100644 --- a/doc/plots/stats/moments_expw.py +++ b/doc/plots/stats/moments_expw.py @@ -19,8 +19,10 @@ ax1.plot(s.index, m.rolling_mean(s, 50, min_periods=1).values, color='r') ax1.set_title('rolling_mean vs. ewma') -line1 = ax2.plot(s.index, m.ewmstd(s, span=50, min_periods=1).values, color='b') -line2 = ax2.plot(s.index, m.rolling_std(s, 50, min_periods=1).values, color='r') +line1 = ax2.plot( + s.index, m.ewmstd(s, span=50, min_periods=1).values, color='b') +line2 = ax2.plot( + s.index, m.rolling_std(s, 50, min_periods=1).values, color='r') ax2.set_title('rolling_std vs. ewmstd') fig.legend((line1, line2), diff --git a/doc/source/conf.py b/doc/source/conf.py index 692c7757ee17c..76093d83b32e7 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -10,12 +10,13 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys +import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.append(os.path.abspath('.')) +# sys.path.append(os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../sphinxext')) sys.path.extend([ @@ -27,7 +28,7 @@ ]) -# -- General configuration ----------------------------------------------------- +# -- General configuration ----------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. sphinxext. @@ -56,7 +57,7 @@ source_suffix = '.rst' # The encoding of source files. -#source_encoding = 'utf-8' +# source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' @@ -83,43 +84,43 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. -#unused_docs = [] +# unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] -# -- Options for HTML output --------------------------------------------------- +# -- Options for HTML output --------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. @@ -128,31 +129,31 @@ # The style sheet to use for HTML and HTML Help pages. A file of that name # must exist either in Sphinx' static/ path, or in one of the custom paths # given in html_static_path. -#html_style = 'statsmodels.css' +# html_style = 'statsmodels.css' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. html_theme_path = ['themes'] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -161,82 +162,82 @@ # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. html_use_modindex = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' +# html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'pandas' -# -- Options for LaTeX output -------------------------------------------------- +# -- Options for LaTeX output -------------------------------------------- # The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' +# latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' +# latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'pandas.tex', - u'pandas: powerful Python data analysis toolkit', - u'Wes McKinney\n\& PyData Development Team', 'manual'), + ('index', 'pandas.tex', + u'pandas: powerful Python data analysis toolkit', + u'Wes McKinney\n\& PyData Development Team', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # Additional stuff for the LaTeX preamble. -#latex_preamble = '' +# latex_preamble = '' # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_use_modindex = True +# latex_use_modindex = True # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'statsmodels' : ('http://statsmodels.sourceforge.net/devel/', None), - 'python': ('http://docs.python.org/', None) - } + 'statsmodels': ('http://statsmodels.sourceforge.net/devel/', None), + 'python': ('http://docs.python.org/', None) +} import glob autosummary_generate = glob.glob("*.rst") @@ -244,6 +245,6 @@ extlinks = {'issue': ('https://github.com/pydata/pandas/issues/%s', 'issue '), 'pull request': ('https://github.com/pydata/pandas/pulls/%s', - 'pull request '), + 'pull request '), 'wiki': ('https://github.com/pydata/pandas/pulls/%s', - 'wiki ')} + 'wiki ')} diff --git a/examples/finance.py b/examples/finance.py index 639a80afbef8e..24aa337a84024 100644 --- a/examples/finance.py +++ b/examples/finance.py @@ -16,16 +16,17 @@ startDate = datetime(2008, 1, 1) endDate = datetime(2009, 9, 1) + def getQuotes(symbol, start, end): quotes = fin.quotes_historical_yahoo(symbol, start, end) dates, open, close, high, low, volume = zip(*quotes) data = { - 'open' : open, - 'close' : close, - 'high' : high, - 'low' : low, - 'volume' : volume + 'open': open, + 'close': close, + 'high': high, + 'low': low, + 'volume': volume } dates = Index([datetime.fromordinal(int(d)) for d in dates]) @@ -36,10 +37,10 @@ def getQuotes(symbol, start, end): goog = getQuotes('GOOG', startDate, endDate) ibm = getQuotes('IBM', startDate, endDate) -px = DataFrame({'MSFT' : msft['close'], - 'IBM' : ibm['close'], - 'GOOG' : goog['close'], - 'AAPL' : aapl['close']}) +px = DataFrame({'MSFT': msft['close'], + 'IBM': ibm['close'], + 'GOOG': goog['close'], + 'AAPL': aapl['close']}) returns = px / px.shift(1) - 1 # Select dates @@ -54,6 +55,7 @@ def getQuotes(symbol, start, end): # Aggregate monthly + def toMonthly(frame, how): offset = BMonthEnd() @@ -65,8 +67,8 @@ def toMonthly(frame, how): # Statistics stdev = DataFrame({ - 'MSFT' : msft.std(), - 'IBM' : ibm.std() + 'MSFT': msft.std(), + 'IBM': ibm.std() }) # Arithmetic diff --git a/examples/regressions.py b/examples/regressions.py index e78ff90a22687..2d21a0ece58c3 100644 --- a/examples/regressions.py +++ b/examples/regressions.py @@ -11,13 +11,15 @@ start = datetime(2009, 9, 2) dateRange = DateRange(start, periods=N) + def makeDataFrame(): data = DataFrame(np.random.randn(N, 7), - columns=list(string.ascii_uppercase[:7]), - index=dateRange) + columns=list(string.ascii_uppercase[:7]), + index=dateRange) return data + def makeSeries(): return Series(np.random.randn(N), index=dateRange) @@ -25,7 +27,7 @@ def makeSeries(): # Standard rolling linear regression X = makeDataFrame() -Y = makeSeries() +Y = makeSeries() model = ols(y=Y, x=X) @@ -35,9 +37,9 @@ def makeSeries(): # Panel regression data = { - 'A' : makeDataFrame(), - 'B' : makeDataFrame(), - 'C' : makeDataFrame() + 'A': makeDataFrame(), + 'B': makeDataFrame(), + 'C': makeDataFrame() } Y = makeDataFrame() diff --git a/ez_setup.py b/ez_setup.py index 1ff1d3e7a6839..de65d3c1f0375 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -15,7 +15,8 @@ """ import sys DEFAULT_VERSION = "0.6c11" -DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3] +DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[ + :3] md5_data = { 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', @@ -62,9 +63,13 @@ 'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a', } -import sys, os -try: from hashlib import md5 -except ImportError: from md5 import md5 +import sys +import os +try: + from hashlib import md5 +except ImportError: + from md5 import md5 + def _validate_md5(egg_name, data): if egg_name in md5_data: @@ -77,6 +82,7 @@ def _validate_md5(egg_name, data): sys.exit(2) return data + def use_setuptools( version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, download_delay=15 @@ -93,23 +99,27 @@ def use_setuptools( an attempt to abort the calling script. """ was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules + def do_download(): - egg = download_setuptools(version, download_base, to_dir, download_delay) + egg = download_setuptools( + version, download_base, to_dir, download_delay) sys.path.insert(0, egg) - import setuptools; setuptools.bootstrap_install_from = egg + import setuptools + setuptools.bootstrap_install_from = egg try: import pkg_resources except ImportError: - return do_download() + return do_download() try: - pkg_resources.require("setuptools>="+version); return + pkg_resources.require("setuptools>=" + version) + return except pkg_resources.VersionConflict, e: if was_imported: print >>sys.stderr, ( - "The required version of setuptools (>=%s) is not available, and\n" - "can't be installed while this script is running. Please install\n" - " a more recent version first, using 'easy_install -U setuptools'." - "\n\n(Currently using %r)" + "The required version of setuptools (>=%s) is not available, and\n" + "can't be installed while this script is running. Please install\n" + " a more recent version first, using 'easy_install -U setuptools'." + "\n\n(Currently using %r)" ) % (version, e.args[0]) sys.exit(2) else: @@ -118,9 +128,10 @@ def do_download(): except pkg_resources.DistributionNotFound: return do_download() + def download_setuptools( version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, - delay = 15 + delay=15 ): """Download setuptools from a specified location and return its filename @@ -129,8 +140,9 @@ def download_setuptools( with a '/'). `to_dir` is the directory where the egg will be downloaded. `delay` is the number of seconds to pause before an actual download attempt. """ - import urllib2, shutil - egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) + import urllib2 + import shutil + egg_name = "setuptools-%s-py%s.egg" % (version, sys.version[:3]) url = download_base + egg_name saveto = os.path.join(to_dir, egg_name) src = dst = None @@ -152,54 +164,25 @@ def download_setuptools( and place it in this directory before rerunning this script.) ---------------------------------------------------------------------------""", - version, download_base, delay, url - ); from time import sleep; sleep(delay) + version, download_base, delay, url + ) + from time import sleep + sleep(delay) log.warn("Downloading %s", url) src = urllib2.urlopen(url) # Read/write all in one block, so we don't create a corrupt file # if the download is interrupted. data = _validate_md5(egg_name, src.read()) - dst = open(saveto,"wb"); dst.write(data) + dst = open(saveto, "wb") + dst.write(data) finally: - if src: src.close() - if dst: dst.close() + if src: + src.close() + if dst: + dst.close() return os.path.realpath(saveto) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def main(argv, version=DEFAULT_VERSION): """Install or upgrade setuptools and EasyInstall""" try: @@ -208,21 +191,21 @@ def main(argv, version=DEFAULT_VERSION): egg = None try: egg = download_setuptools(version, delay=0) - sys.path.insert(0,egg) + sys.path.insert(0, egg) from setuptools.command.easy_install import main - return main(list(argv)+[egg]) # we're done here + return main(list(argv) + [egg]) # we're done here finally: if egg and os.path.exists(egg): os.unlink(egg) else: if setuptools.__version__ == '0.0.1': print >>sys.stderr, ( - "You have an obsolete version of setuptools installed. Please\n" - "remove it from your system entirely before rerunning this script." + "You have an obsolete version of setuptools installed. Please\n" + "remove it from your system entirely before rerunning this script." ) sys.exit(2) - req = "setuptools>="+version + req = "setuptools>=" + version import pkg_resources try: pkg_resources.require(req) @@ -231,16 +214,17 @@ def main(argv, version=DEFAULT_VERSION): from setuptools.command.easy_install import main except ImportError: from easy_install import main - main(list(argv)+[download_setuptools(delay=0)]) - sys.exit(0) # try to force an exit + main(list(argv) + [download_setuptools(delay=0)]) + sys.exit(0) # try to force an exit else: if argv: from setuptools.command.easy_install import main main(argv) else: - print "Setuptools version",version,"or greater has been installed." + print "Setuptools version", version, "or greater has been installed." print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + def update_md5(filenames): """Update our built-in md5 registry""" @@ -248,7 +232,7 @@ def update_md5(filenames): for name in filenames: base = os.path.basename(name) - f = open(name,'rb') + f = open(name, 'rb') md5_data[base] = md5(f.read()).hexdigest() f.close() @@ -258,7 +242,9 @@ def update_md5(filenames): import inspect srcfile = inspect.getsourcefile(sys.modules[__name__]) - f = open(srcfile, 'rb'); src = f.read(); f.close() + f = open(srcfile, 'rb') + src = f.read() + f.close() match = re.search("\nmd5_data = {\n([^}]+)}", src) if not match: @@ -266,19 +252,13 @@ def update_md5(filenames): sys.exit(2) src = src[:match.start(1)] + repl + src[match.end(1):] - f = open(srcfile,'w') + f = open(srcfile, 'w') f.write(src) f.close() -if __name__=='__main__': - if len(sys.argv)>2 and sys.argv[1]=='--md5update': +if __name__ == '__main__': + if len(sys.argv) > 2 and sys.argv[1] == '--md5update': update_md5(sys.argv[2:]) else: main(sys.argv[1:]) - - - - - - diff --git a/pandas/__init__.py b/pandas/__init__.py index 6c58c708b8306..5780ddfbe1fdc 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -6,7 +6,7 @@ from . import hashtable, tslib, lib except Exception: # pragma: no cover import sys - e = sys.exc_info()[1] # Py25 and Py3 current exception syntax conflict + e = sys.exc_info()[1] # Py25 and Py3 current exception syntax conflict print e if 'No module named lib' in str(e): raise ImportError('C extensions not built: if you installed already ' diff --git a/pandas/compat/scipy.py b/pandas/compat/scipy.py index 9f021a01ebce3..aab8bd89c5af4 100644 --- a/pandas/compat/scipy.py +++ b/pandas/compat/scipy.py @@ -63,7 +63,7 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'): if limit: values = values[(limit[0] <= values) & (values <= limit[1])] - idx = per /100. * (values.shape[0] - 1) + idx = per / 100. * (values.shape[0] - 1) if (idx % 1 == 0): score = values[idx] else: @@ -75,7 +75,7 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'): elif interpolation_method == 'higher': score = values[np.ceil(idx)] else: - raise ValueError("interpolation_method can only be 'fraction', " \ + raise ValueError("interpolation_method can only be 'fraction', " "'lower' or 'higher'") return score @@ -85,7 +85,7 @@ def _interpolate(a, b, fraction): """Returns the point at the given fraction between a and b, where 'fraction' must be between 0 and 1. """ - return a + (b - a)*fraction + return a + (b - a) * fraction def rankdata(a): @@ -121,9 +121,9 @@ def rankdata(a): for i in xrange(n): sumranks += i dupcount += 1 - if i==n-1 or svec[i] != svec[i+1]: + if i == n - 1 or svec[i] != svec[i + 1]: averank = sumranks / float(dupcount) + 1 - for j in xrange(i-dupcount+1,i+1): + for j in xrange(i - dupcount + 1, i + 1): newarray[ivec[j]] = averank sumranks = 0 dupcount = 0 diff --git a/pandas/core/api.py b/pandas/core/api.py index 7820a94fdf4b1..306f9aff8f4d3 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -27,8 +27,8 @@ from pandas.tseries.period import Period, PeriodIndex # legacy -from pandas.core.daterange import DateRange # deprecated +from pandas.core.daterange import DateRange # deprecated import pandas.core.datetools as datetools -from pandas.core.config import get_option,set_option,reset_option,\ - describe_option, options +from pandas.core.config import get_option, set_option, reset_option,\ + describe_option, options diff --git a/pandas/core/common.py b/pandas/core/common.py index 869cc513d0aaf..6fc490e133905 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -53,6 +53,7 @@ def isnull(obj): ''' return _isnull(obj) + def _isnull_new(obj): if lib.isscalar(obj): return lib.checknull(obj) @@ -68,6 +69,7 @@ def _isnull_new(obj): else: return obj is None + def _isnull_old(obj): ''' Detect missing values. Treat None, NaN, INF, -INF as null. @@ -96,6 +98,7 @@ def _isnull_old(obj): _isnull = _isnull_new + def _use_inf_as_null(key): '''Option change callback for null/inf behaviour Choose which replacement for numpy.isnan / -numpy.isfinite is used. @@ -116,13 +119,12 @@ def _use_inf_as_null(key): programmatically-creating-variables-in-python/4859312#4859312 ''' flag = get_option(key) - if flag == True: + if flag: globals()['_isnull'] = _isnull_old else: globals()['_isnull'] = _isnull_new - def _isnull_ndarraylike(obj): from pandas import Series values = np.asarray(obj) @@ -176,6 +178,7 @@ def _isnull_ndarraylike_old(obj): result = -np.isfinite(obj) return result + def notnull(obj): ''' Replacement for numpy.isfinite / -numpy.isnan which is suitable @@ -278,7 +281,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan): 'object': algos.take_2d_axis1_object, 'bool': _view_wrapper(algos.take_2d_axis1_bool, np.uint8), 'datetime64[ns]': _view_wrapper(algos.take_2d_axis1_int64, np.int64, - na_override=tslib.iNaT), + na_override=tslib.iNaT), } _take2d_multi_dict = { @@ -458,6 +461,7 @@ def mask_out_axis(arr, mask, axis, fill_value=np.nan): 'int32': algos.diff_2d_int32 } + def diff(arr, n, axis=0): n = int(n) dtype = arr.dtype @@ -628,6 +632,7 @@ def _consensus_name_attr(objs): #---------------------------------------------------------------------- # Lots of little utilities + def _possibly_cast_to_datetime(value, dtype): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ @@ -648,9 +653,10 @@ def _possibly_cast_to_datetime(value, dtype): value = tslib.array_to_datetime(value) except: pass - + return value + def _infer_dtype(value): if isinstance(value, (float, np.floating)): return np.float_ @@ -671,6 +677,7 @@ def _possibly_cast_item(obj, item, dtype): elif not issubclass(dtype, (np.integer, np.bool_)): # pragma: no cover raise ValueError("Unexpected dtype encountered: %s" % dtype) + def _is_bool_indexer(key): if isinstance(key, np.ndarray) and key.dtype == np.object_: key = np.asarray(key) @@ -691,6 +698,7 @@ def _is_bool_indexer(key): return False + def _default_index(n): from pandas.core.index import Int64Index values = np.arange(n, dtype=np.int64) @@ -805,24 +813,26 @@ def iterpairs(seq): return itertools.izip(seq_it, seq_it_next) + def split_ranges(mask): """ Generates tuples of ranges which cover all True value in mask >>> list(split_ranges([1,0,0,1,0])) [(0, 1), (3, 4)] """ - ranges = [(0,len(mask))] + ranges = [(0, len(mask))] - for pos,val in enumerate(mask): - if not val: # this pos should be ommited, split off the prefix range + for pos, val in enumerate(mask): + if not val: # this pos should be ommited, split off the prefix range r = ranges.pop() - if pos > r[0]: # yield non-zero range - yield (r[0],pos) - if pos+1 < len(mask): # save the rest for processing - ranges.append((pos+1,len(mask))) + if pos > r[0]: # yield non-zero range + yield (r[0], pos) + if pos + 1 < len(mask): # save the rest for processing + ranges.append((pos + 1, len(mask))) if ranges: yield ranges[-1] + def indent(string, spaces=4): dent = ' ' * spaces return '\n'.join([dent + x for x in string.split('\n')]) @@ -972,6 +982,7 @@ def is_integer_dtype(arr_or_dtype): (issubclass(tipo, np.datetime64) or issubclass(tipo, np.timedelta64))) + def _is_int_or_datetime_dtype(arr_or_dtype): # also timedelta64 if isinstance(arr_or_dtype, np.dtype): @@ -980,6 +991,7 @@ def _is_int_or_datetime_dtype(arr_or_dtype): tipo = arr_or_dtype.dtype.type return issubclass(tipo, np.integer) + def is_datetime64_dtype(arr_or_dtype): if isinstance(arr_or_dtype, np.dtype): tipo = arr_or_dtype.type @@ -1022,7 +1034,7 @@ def _astype_nansafe(arr, dtype): if dtype == object: return tslib.ints_to_pydatetime(arr.view(np.int64)) elif (np.issubdtype(arr.dtype, np.floating) and - np.issubdtype(dtype, np.integer)): + np.issubdtype(dtype, np.integer)): if np.isnan(arr).any(): raise ValueError('Cannot convert NA to integer') @@ -1091,8 +1103,6 @@ def load(path): f.close() - - class UTF8Recoder: """ Iterator that reads an encoded stream and reencodes the input to UTF-8 @@ -1214,6 +1224,7 @@ def _concat_compat(to_concat, axis=0): else: return np.concatenate(to_concat, axis=axis) + def in_interactive_session(): """ check if we're running in an interactive shell @@ -1229,6 +1240,7 @@ def check_main(): except: return check_main() + def in_qtconsole(): """ check if we're inside an IPython qtconsole @@ -1277,6 +1289,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds): fmt = u"[%s]" if hasattr(seq, '__setitem__') else u"(%s)" return fmt % ", ".join(pprint_thing(e, _nest_lvl + 1, **kwds) for e in seq) + def _pprint_dict(seq, _nest_lvl=0): """ internal. pprinter for iterables. you should probably use pprint_thing() @@ -1314,14 +1327,14 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None): if thing is None: result = '' - elif (py3compat.PY3 and hasattr(thing,'__next__')) or \ - hasattr(thing,'next'): + elif (py3compat.PY3 and hasattr(thing, '__next__')) or \ + hasattr(thing, 'next'): return unicode(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl) elif _is_sequence(thing) and _nest_lvl < \ - get_option("display.pprint_nest_depth"): + get_option("display.pprint_nest_depth"): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars) else: # when used internally in the package, everything @@ -1344,14 +1357,14 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None): } escape_chars = escape_chars or tuple() for c in escape_chars: - result=result.replace(c,translate[c]) + result = result.replace(c, translate[c]) return unicode(result) # always unicode def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): value = pprint_thing(object) # get unicode representation of object - return value.encode(encoding, errors,**kwds) + return value.encode(encoding, errors, **kwds) def console_encode(object, **kwds): @@ -1363,4 +1376,4 @@ def console_encode(object, **kwds): where you output to the console. """ return pprint_thing_encoded(object, - get_option("display.encoding")) + get_option("display.encoding")) diff --git a/pandas/core/config.py b/pandas/core/config.py index 2d5652690388b..aca01adb3c58d 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -56,7 +56,8 @@ import warnings DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') -RegisteredOption = namedtuple('RegisteredOption', 'key defval doc validator cb') +RegisteredOption = namedtuple( + 'RegisteredOption', 'key defval doc validator cb') _deprecated_options = {} # holds deprecated option metdata _registered_options = {} # holds registered option metdata @@ -84,8 +85,9 @@ def _get_single_key(pat, silent): return key + def _get_option(pat, silent=False): - key = _get_single_key(pat,silent) + key = _get_single_key(pat, silent) # walk the nested dict root, k = _get_root(key) @@ -93,7 +95,7 @@ def _get_option(pat, silent=False): def _set_option(pat, value, silent=False): - key = _get_single_key(pat,silent) + key = _get_single_key(pat, silent) o = _get_registered_option(key) if o and o.validator: @@ -138,33 +140,34 @@ def _reset_option(pat): for k in keys: _set_option(k, _registered_options[k].defval) + class DictWrapper(object): """ provide attribute-style access to a nested dict """ - def __init__(self,d,prefix=""): - object.__setattr__(self,"d",d) - object.__setattr__(self,"prefix",prefix) + def __init__(self, d, prefix=""): + object.__setattr__(self, "d", d) + object.__setattr__(self, "prefix", prefix) - def __setattr__(self,key,val): - prefix = object.__getattribute__(self,"prefix") + def __setattr__(self, key, val): + prefix = object.__getattribute__(self, "prefix") if prefix: prefix += "." prefix += key # you can't set new keys # can you can't overwrite subtrees - if key in self.d and not isinstance(self.d[key],dict): - _set_option(prefix,val) + if key in self.d and not isinstance(self.d[key], dict): + _set_option(prefix, val) else: raise KeyError("You can only set the value of existing options") - def __getattr__(self,key): - prefix = object.__getattribute__(self,"prefix") + def __getattr__(self, key): + prefix = object.__getattribute__(self, "prefix") if prefix: prefix += "." prefix += key - v=object.__getattribute__(self,"d")[key] - if isinstance(v,dict): - return DictWrapper(v,prefix) + v = object.__getattribute__(self, "d")[key] + if isinstance(v, dict): + return DictWrapper(v, prefix) else: return _get_option(prefix) @@ -179,6 +182,7 @@ def __dir__(self): # using the py2.6+ advanced formatting syntax to plug in a concise list # of options, and option descriptions. + class CallableDyanmicDoc(object): def __init__(self, func, doc_tmpl): @@ -193,7 +197,7 @@ def __doc__(self): opts_desc = _describe_option('all', _print_desc=False) opts_list = pp_options_list(_registered_options.keys()) return self.__doc_tmpl__.format(opts_desc=opts_desc, - opts_list=opts_list) + opts_list=opts_list) _get_option_tmpl = """"get_option(pat) - Retrieves the value of the specified option @@ -219,7 +223,7 @@ def __doc__(self): {opts_desc} """ -_set_option_tmpl="""set_option(pat,value) - Sets the value of the specified option +_set_option_tmpl = """set_option(pat,value) - Sets the value of the specified option Available options: {opts_list} @@ -245,7 +249,7 @@ def __doc__(self): {opts_desc} """ -_describe_option_tmpl="""describe_option(pat,_print_desc=False) Prints the description +_describe_option_tmpl = """describe_option(pat,_print_desc=False) Prints the description for one or more registered options. Call with not arguments to get a listing for all registered options. @@ -270,7 +274,7 @@ def __doc__(self): {opts_desc} """ -_reset_option_tmpl="""reset_option(pat) - Reset one or more options to their default value. +_reset_option_tmpl = """reset_option(pat) - Reset one or more options to their default value. Pass "all" as argument to reset all options. @@ -303,19 +307,20 @@ def __doc__(self): ###################################################### # Functions for use by pandas developers, in addition to User - api + class option_context(object): - def __init__(self,*args): - assert len(args) % 2 == 0 and len(args)>=2, \ - "Need to invoke as option_context(pat,val,[(pat,val),..))." - ops = zip(args[::2],args[1::2]) - undo=[] - for pat,val in ops: - undo.append((pat,_get_option(pat,silent=True))) + def __init__(self, *args): + assert len(args) % 2 == 0 and len(args) >= 2, \ + "Need to invoke as option_context(pat,val,[(pat,val),..))." + ops = zip(args[::2], args[1::2]) + undo = [] + for pat, val in ops: + undo.append((pat, _get_option(pat, silent=True))) self.undo = undo - for pat,val in ops: - _set_option(pat,val,silent=True) + for pat, val in ops: + _set_option(pat, val, silent=True) def __enter__(self): pass @@ -325,6 +330,7 @@ def __exit__(self, *args): for pat, val in self.undo: _set_option(pat, val) + def register_option(key, defval, doc='', validator=None, cb=None): """Register an option in the package-wide pandas config object @@ -348,7 +354,8 @@ def register_option(key, defval, doc='', validator=None, cb=None): ValueError if `validator` is specified and `defval` is not a valid value. """ - import tokenize, keyword + import tokenize + import keyword key = key.lower() if key in _registered_options: @@ -364,7 +371,7 @@ def register_option(key, defval, doc='', validator=None, cb=None): path = key.split('.') for k in path: - if not bool(re.match('^'+tokenize.Name+'$', k)): + if not bool(re.match('^' + tokenize.Name + '$', k)): raise ValueError("%s is not a valid identifier" % k) if keyword.iskeyword(key): raise ValueError("%s is a python keyword" % k) @@ -374,7 +381,7 @@ def register_option(key, defval, doc='', validator=None, cb=None): if not isinstance(cursor, dict): raise KeyError("Path prefix to option '%s' is already an option" % '.'.join(path[:i])) - if not cursor.has_key(p): + if p not in cursor: cursor[p] = {} cursor = cursor[p] @@ -382,12 +389,11 @@ def register_option(key, defval, doc='', validator=None, cb=None): raise KeyError("Path prefix to option '%s' is already an option" % '.'.join(path[:-1])) - cursor[path[-1]] = defval # initialize # save the option metadata _registered_options[key] = RegisteredOption(key=key, defval=defval, - doc=doc, validator=validator,cb=cb) + doc=doc, validator=validator, cb=cb) def deprecate_option(key, msg=None, rkey=None, removal_ver=None): @@ -466,7 +472,7 @@ def _is_deprecated(key): """ Returns True if the given option has been deprecated """ key = key.lower() - return _deprecated_options.has_key(key) + return key in _deprecated_options def _get_deprecated_option(key): diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index aaf0584174f7e..11b5afcf39a52 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -1,5 +1,5 @@ import pandas.core.config as cf -from pandas.core.config import is_int,is_bool,is_text,is_float +from pandas.core.config import is_int, is_bool, is_text, is_float from pandas.core.format import detect_console_encoding """ @@ -18,25 +18,25 @@ ########################################### # options from the "display" namespace -pc_precision_doc=""" +pc_precision_doc = """ : int Floating point output precision (number of significant digits). This is only a suggestion """ -pc_colspace_doc=""" +pc_colspace_doc = """ : int Default space for DataFrame columns, defaults to 12 """ -pc_max_rows_doc=""" +pc_max_rows_doc = """ : int This sets the maximum number of rows pandas should output when printing out various output. For example, this value determines whether the repr() for a dataframe prints out fully or just an summary repr. """ -pc_max_cols_doc=""" +pc_max_cols_doc = """ : int max_rows and max_columns are used in __repr__() methods to decide if to_string() or info() is used to render an object to a string. @@ -45,48 +45,48 @@ columns that can fit on it. """ -pc_max_info_cols_doc=""" +pc_max_info_cols_doc = """ : int max_info_columns is used in DataFrame.info method to decide if per column information will be printed. """ -pc_nb_repr_h_doc=""" +pc_nb_repr_h_doc = """ : boolean When True (default), IPython notebook will use html representation for pandas objects (if it is available). """ -pc_date_dayfirst_doc=""" +pc_date_dayfirst_doc = """ : boolean When True, prints and parses dates with the day first, eg 20/01/2005 """ -pc_date_yearfirst_doc=""" +pc_date_yearfirst_doc = """ : boolean When True, prints and parses dates with the year first, eg 2005/01/20 """ -pc_pprint_nest_depth=""" +pc_pprint_nest_depth = """ : int Defaults to 3. Controls the number of nested levels to process when pretty-printing """ -pc_multi_sparse_doc=""" +pc_multi_sparse_doc = """ : boolean Default True, "sparsify" MultiIndex display (don't display repeated elements in outer levels within groups) """ -pc_encoding_doc=""" +pc_encoding_doc = """ : str/unicode Defaults to the detected encoding of the console. Specifies the encoding to be used for strings returned by to_string, these are generally strings meant to be displayed on the console. """ -float_format_doc=""" +float_format_doc = """ : callable The callable should accept a floating point number and return a string with the desired format of the number. This is used @@ -95,19 +95,19 @@ """ -max_colwidth_doc=""" +max_colwidth_doc = """ : int The maximum width in characters of a column in the repr of a pandas data structure. When the column overflows, a "..." placeholder is embedded in the output. """ -colheader_justify_doc=""" +colheader_justify_doc = """ : 'left'/'right' Controls the justification of column headers. used by DataFrameFormatter. """ -pc_expand_repr_doc=""" +pc_expand_repr_doc = """ : boolean Default False Whether to print out the full DataFrame repr for wide DataFrames @@ -115,7 +115,7 @@ If False, the summary representation is shown. """ -pc_line_width_doc=""" +pc_line_width_doc = """ : int Default 80 When printing wide DataFrames, this is the width of each line. @@ -143,11 +143,11 @@ cf.register_option('multi_sparse', True, pc_multi_sparse_doc, validator=is_bool) cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc, - validator=is_text) + validator=is_text) cf.register_option('expand_frame_repr', True, pc_expand_repr_doc) cf.register_option('line_width', 80, pc_line_width_doc) -tc_sim_interactive_doc=""" +tc_sim_interactive_doc = """ : boolean Default False Whether to simulate interactive mode for purposes of testing @@ -155,7 +155,7 @@ with cf.config_prefix('mode'): cf.register_option('sim_interactive', False, tc_sim_interactive_doc) -use_inf_as_null_doc=""" +use_inf_as_null_doc = """ : boolean True means treat None, NaN, INF, -INF as null (old way), False means None and NaN are null, but INF, -INF are not null @@ -164,6 +164,8 @@ # we don't want to start importing evrything at the global context level # or we'll hit circular deps. + + def use_inf_as_null_cb(key): from pandas.core.common import _use_inf_as_null _use_inf_as_null(key) diff --git a/pandas/core/daterange.py b/pandas/core/daterange.py index bfed7fcc6a734..954d72defdbbb 100644 --- a/pandas/core/daterange.py +++ b/pandas/core/daterange.py @@ -18,7 +18,7 @@ def __new__(cls, start=None, end=None, periods=None, import warnings warnings.warn("DateRange is deprecated, use DatetimeIndex instead", - FutureWarning) + FutureWarning) if time_rule is None: time_rule = kwds.get('timeRule') diff --git a/pandas/core/format.py b/pandas/core/format.py index 862dea1ba5ed4..cc931545e6bd0 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -89,8 +89,9 @@ def _get_footer(self): footer += ', ' series_name = com.pprint_thing(self.series.name, - escape_chars=('\t','\r','\n')) - footer += ("Name: %s" % series_name) if self.series.name is not None else "" + escape_chars=('\t', '\r', '\n')) + footer += ("Name: %s" % + series_name) if self.series.name is not None else "" if self.length: if footer: @@ -153,6 +154,7 @@ def _encode_diff_func(): _encode_diff = lambda x: 0 else: encoding = get_option("display.encoding") + def _encode_diff(x): return len(x) - len(x.decode(encoding)) @@ -164,6 +166,7 @@ def _strlen_func(): _strlen = len else: encoding = get_option("display.encoding") + def _strlen(x): try: return len(x.decode(encoding)) @@ -172,8 +175,8 @@ def _strlen(x): return _strlen -class TableFormatter(object): +class TableFormatter(object): def _get_formatter(self, i): if isinstance(self.formatters, (list, tuple)): @@ -283,8 +286,9 @@ def to_string(self, force_unicode=None): """ import warnings if force_unicode is not None: # pragma: no cover - warnings.warn("force_unicode is deprecated, it will have no effect", - FutureWarning) + warnings.warn( + "force_unicode is deprecated, it will have no effect", + FutureWarning) frame = self.frame @@ -337,8 +341,9 @@ def to_latex(self, force_unicode=None, column_format=None): """ import warnings if force_unicode is not None: # pragma: no cover - warnings.warn("force_unicode is deprecated, it will have no effect", - FutureWarning) + warnings.warn( + "force_unicode is deprecated, it will have no effect", + FutureWarning) frame = self.frame @@ -400,7 +405,7 @@ def is_numeric_dtype(dtype): str_columns = zip(*[[' ' + y if y not in self.formatters and need_leadsp[x] else y for y in x] - for x in fmt_columns]) + for x in fmt_columns]) if self.sparsify: str_columns = _sparsify(str_columns) @@ -498,8 +503,8 @@ def write(self, s, indent=0): def write_th(self, s, indent=0, tags=None): if (self.fmt.col_space is not None - and self.fmt.col_space > 0 ): - tags = (tags or "" ) + and self.fmt.col_space > 0): + tags = (tags or "") tags += 'style="min-width: %s;"' % self.fmt.col_space return self._write_cell(s, kind='th', indent=indent, tags=tags) @@ -512,7 +517,8 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): start_tag = '<%s %s>' % (kind, tags) else: start_tag = '<%s>' % kind - self.write('%s%s' % (start_tag, com.pprint_thing(s), kind), indent) + self.write( + '%s%s' % (start_tag, com.pprint_thing(s), kind), indent) def write_tr(self, line, indent=0, indent_delta=4, header=False, align=None, tags=None): @@ -585,7 +591,7 @@ def _column_header(): row.append('') style = "text-align: %s;" % self.fmt.justify row.extend([single_column_table(c, self.fmt.justify, style) for - c in self.columns]) + c in self.columns]) else: if self.fmt.index: row.append(self.columns.name or '') @@ -629,7 +635,7 @@ def _column_header(): align = self.fmt.justify self.write_tr(col_row, indent, self.indent_delta, header=True, - align=align) + align=align) if self.fmt.has_index_names: row = [x if x is not None else '' @@ -750,7 +756,7 @@ def grouper(x): return result -#from collections import namedtuple +# from collections import namedtuple # ExcelCell = namedtuple("ExcelCell", # 'row, col, val, style, mergestart, mergeend') @@ -759,7 +765,7 @@ class ExcelCell(object): __slots__ = __fields__ def __init__(self, row, col, val, - style=None, mergestart=None, mergeend=None): + style=None, mergestart=None, mergeend=None): self.row = row self.col = col self.val = val @@ -769,11 +775,11 @@ def __init__(self, row, col, val, header_style = {"font": {"bold": True}, - "borders": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "alignment": {"horizontal": "center"}} + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} class ExcelFormatter(object): @@ -832,7 +838,7 @@ def _format_header_mi(self): return levels = self.columns.format(sparsify=True, adjoin=False, - names=False) + names=False) # level_lenghts = _get_level_lengths(levels) coloffset = 1 if isinstance(self.df.index, MultiIndex): @@ -847,12 +853,12 @@ def _format_header_mi(self): # yield ExcelCell(lnum,coloffset + i + 1, values[i], # header_style, lnum, coloffset + i + records[i]) # else: - # yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style) + # yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style) # self.rowcounter = lnum - lnum=0 - for i, values in enumerate(zip(*levels)): - v = ".".join(map(com.pprint_thing,values)) + lnum = 0 + for i, values in enumerate(zip(*levels)): + v = ".".join(map(com.pprint_thing, values)) yield ExcelCell(lnum, coloffset + i, v, header_style) self.rowcounter = lnum @@ -870,7 +876,7 @@ def _format_header_regular(self): if has_aliases: if len(self.header) != len(self.columns): raise ValueError(('Writing %d cols but got %d aliases' - % (len(self.columns), len(self.header)))) + % (len(self.columns), len(self.header)))) else: colnames = self.header @@ -907,20 +913,20 @@ def _format_regular_rows(self): self.rowcounter += 1 coloffset = 0 - #output index and index_label? + # output index and index_label? if self.index: - #chek aliases - #if list only take first as this is not a MultiIndex + # chek aliases + # if list only take first as this is not a MultiIndex if self.index_label and isinstance(self.index_label, (list, tuple, np.ndarray)): index_label = self.index_label[0] - #if string good to go + # if string good to go elif self.index_label and isinstance(self.index_label, str): index_label = self.index_label else: index_label = self.df.index.names[0] - if index_label and self.header != False: + if index_label and self.header is not False: # add to same level as column names # if isinstance(self.df.columns, MultiIndex): # yield ExcelCell(self.rowcounter, 0, @@ -928,9 +934,9 @@ def _format_regular_rows(self): # self.rowcounter += 1 # else: yield ExcelCell(self.rowcounter - 1, 0, - index_label, header_style) + index_label, header_style) - #write index_values + # write index_values index_values = self.df.index if isinstance(self.df.index, PeriodIndex): index_values = self.df.index.to_timestamp() @@ -950,16 +956,17 @@ def _format_hierarchical_rows(self): self.rowcounter += 1 gcolidx = 0 - #output index and index_label? + # output index and index_label? if self.index: index_labels = self.df.index.names - #check for aliases + # check for aliases if self.index_label and isinstance(self.index_label, (list, tuple, np.ndarray)): index_labels = self.index_label - #if index labels are not empty go ahead and dump - if filter(lambda x: x is not None, index_labels) and self.header != False: + # if index labels are not empty go ahead and dump + if (filter(lambda x: x is not None, index_labels) + and self.header is not False): # if isinstance(self.df.columns, MultiIndex): # self.rowcounter += 1 # else: @@ -981,7 +988,7 @@ def _format_hierarchical_rows(self): yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val) def get_formatted_cells(self): - for cell in itertools.chain(self._format_header(),self._format_body() + for cell in itertools.chain(self._format_header(), self._format_body() ): cell.val = self._format_value(cell.val) yield cell @@ -1043,8 +1050,8 @@ def _format_strings(self): else: float_format = self.float_format - formatter = (lambda x: com.pprint_thing(x,escape_chars=('\t','\r','\n'))) \ - if self.formatter is None else self.formatter + formatter = (lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))) \ + if self.formatter is None else self.formatter def _format(x): if self.na_rep is not None and lib.checknull(x): @@ -1197,7 +1204,7 @@ def _trim_zeros(str_floats, na_rep='NaN'): def _cond(values): non_na = [x for x in values if x != na_rep] return (len(non_na) > 0 and all([x.endswith('0') for x in non_na]) and - not(any([('e' in x) or ('E' in x) for x in non_na]))) + not(any([('e' in x) or ('E' in x) for x in non_na]))) while _cond(trimmed): trimmed = [x[:-1] if x != na_rep else x for x in trimmed] @@ -1242,7 +1249,7 @@ def set_printoptions(precision=None, column_space=None, max_rows=None, max_columns=None, colheader_justify=None, max_colwidth=None, notebook_repr_html=None, date_dayfirst=None, date_yearfirst=None, - pprint_nest_depth=None,multi_sparse=None, encoding=None): + pprint_nest_depth=None, multi_sparse=None, encoding=None): """ Alter default behavior of DataFrame.toString @@ -1276,7 +1283,7 @@ def set_printoptions(precision=None, column_space=None, max_rows=None, """ import warnings warnings.warn("set_printoptions is deprecated, use set_option instead", - FutureWarning) + FutureWarning) if precision is not None: set_option("display.precision", precision) if column_space is not None: @@ -1302,12 +1309,14 @@ def set_printoptions(precision=None, column_space=None, max_rows=None, if encoding is not None: set_option("display.encoding", encoding) + def reset_printoptions(): import warnings warnings.warn("reset_printoptions is deprecated, use reset_option instead", - FutureWarning) + FutureWarning) reset_option("^display\.") + def detect_console_encoding(): """ Try to find the most capable encoding supported by the console. @@ -1321,17 +1330,18 @@ def detect_console_encoding(): except AttributeError: pass - if not encoding or encoding == 'ascii': # try again for something better + if not encoding or encoding == 'ascii': # try again for something better try: encoding = locale.getpreferredencoding() except Exception: pass - if not encoding: # when all else fails. this will usually be "ascii" + if not encoding: # when all else fails. this will usually be "ascii" encoding = sys.getdefaultencoding() return encoding + class EngFormatter(object): """ Formats float values according to engineering format. @@ -1346,19 +1356,19 @@ class EngFormatter(object): -18: "a", -15: "f", -12: "p", - -9: "n", - -6: "u", - -3: "m", - 0: "", - 3: "k", - 6: "M", - 9: "G", - 12: "T", - 15: "P", - 18: "E", - 21: "Z", - 24: "Y" - } + -9: "n", + -6: "u", + -3: "m", + 0: "", + 3: "k", + 6: "M", + 9: "G", + 12: "T", + 15: "P", + 18: "E", + 21: "Z", + 24: "Y" + } def __init__(self, accuracy=None, use_eng_prefix=False): self.accuracy = accuracy @@ -1421,7 +1431,7 @@ def __call__(self, num): formatted = format_str % (mant, prefix) - return formatted #.strip() + return formatted # .strip() def set_eng_float_format(precision=None, accuracy=3, use_eng_prefix=False): @@ -1441,11 +1451,13 @@ def set_eng_float_format(precision=None, accuracy=3, use_eng_prefix=False): set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) set_option("display.column_space", max(12, accuracy + 9)) + def _put_lines(buf, lines): if any(isinstance(x, unicode) for x in lines): lines = [unicode(x) for x in lines] buf.write('\n'.join(lines)) + def _binify(cols, width): bins = [] curr_width = 0 @@ -1462,12 +1474,12 @@ def _binify(cols, width): arr = np.array([746.03, 0.00, 5620.00, 1592.36]) # arr = np.array([11111111.1, 1.55]) # arr = [314200.0034, 1.4125678] - arr = np.array([ 327763.3119, 345040.9076, 364460.9915, 398226.8688, - 383800.5172, 433442.9262, 539415.0568, 568590.4108, - 599502.4276, 620921.8593, 620898.5294, 552427.1093, - 555221.2193, 519639.7059, 388175.7 , 379199.5854, - 614898.25 , 504833.3333, 560600. , 941214.2857, - 1134250. , 1219550. , 855736.85 , 1042615.4286, - 722621.3043, 698167.1818, 803750. ]) + arr = np.array([327763.3119, 345040.9076, 364460.9915, 398226.8688, + 383800.5172, 433442.9262, 539415.0568, 568590.4108, + 599502.4276, 620921.8593, 620898.5294, 552427.1093, + 555221.2193, 519639.7059, 388175.7, 379199.5854, + 614898.25, 504833.3333, 560600., 941214.2857, + 1134250., 1219550., 855736.85, 1042615.4286, + 722621.3043, 698167.1818, 803750.]) fmt = FloatArrayFormatter(arr, digits=7) print fmt.get_result() diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1eaa009275808..d81ceaeec8a7a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -181,6 +181,8 @@ class DataConflictError(Exception): #---------------------------------------------------------------------- # Factory helper methods + + def _arith_method(op, name, default_axis='columns'): def na_op(x, y): try: @@ -601,7 +603,7 @@ def _need_info_repr_(self): if max_columns > 0: if (len(self.index) <= max_rows and - (len(self.columns) <= max_columns and expand_repr)): + (len(self.columns) <= max_columns and expand_repr)): return False else: return True @@ -618,7 +620,7 @@ def _need_info_repr_(self): value = buf.getvalue() if (max([len(l) for l in value.split('\n')]) > terminal_width and com.in_interactive_session() - and not expand_repr): + and not expand_repr): return True else: return False @@ -643,7 +645,7 @@ def __bytes__(self): Yields a bytestring in both py2/py3. """ encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding , 'replace') + return self.__unicode__().encode(encoding, 'replace') def __unicode__(self): """ @@ -768,18 +770,18 @@ def __contains__(self, key): __sub__ = _arith_method(operator.sub, '__sub__', default_axis=None) __mul__ = _arith_method(operator.mul, '__mul__', default_axis=None) __truediv__ = _arith_method(operator.truediv, '__truediv__', - default_axis=None) + default_axis=None) __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', - default_axis=None) + default_axis=None) __pow__ = _arith_method(operator.pow, '__pow__', default_axis=None) __radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None) __rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None) __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__', default_axis=None) __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__', - default_axis=None) + default_axis=None) __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__', - default_axis=None) + default_axis=None) __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__', default_axis=None) @@ -832,7 +834,7 @@ def dot(self, other): if isinstance(other, (Series, DataFrame)): common = self.columns.union(other.index) if (len(common) > len(self.columns) or - len(common) > len(other.index)): + len(common) > len(other.index)): raise ValueError('matrices are not aligned') left = self.reindex(columns=common, copy=False) @@ -887,7 +889,7 @@ def from_dict(cls, data, orient='columns', dtype=None): orient = orient.lower() if orient == 'index': if len(data) > 0: - #TODO speed up Series case + # TODO speed up Series case if isinstance(data.values()[0], (Series, dict)): data = _from_nested_dict(data) else: @@ -918,7 +920,7 @@ def to_dict(self, outtype='dict'): import warnings if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " - "columns will be omitted.",UserWarning) + "columns will be omitted.", UserWarning) if outtype.lower().startswith('d'): return dict((k, v.to_dict()) for k, v in self.iteritems()) elif outtype.lower().startswith('l'): @@ -1025,7 +1027,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, result_index = None if index is not None: if (isinstance(index, basestring) or - not hasattr(index, "__iter__")): + not hasattr(index, "__iter__")): i = columns.get_loc(index) exclude.add(index) result_index = Index(arrays[i], name=index) @@ -1221,7 +1223,7 @@ def to_panel(self): # only support this kind for now if (not isinstance(self.index, MultiIndex) or - len(self.index.levels) != 2): + len(self.index.levels) != 2): raise AssertionError('Must have 2-level MultiIndex') if not self.index.is_unique: @@ -1264,8 +1266,8 @@ def to_panel(self): to_wide = deprecate('to_wide', to_panel) def _helper_csv(self, writer, na_rep=None, cols=None, - header=True, index=True, - index_label=None, float_format=None): + header=True, index=True, + index_label=None, float_format=None): if cols is None: cols = self.columns @@ -1406,9 +1408,9 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, csvout = csv.writer(f, lineterminator=line_terminator, delimiter=sep, quoting=quoting) self._helper_csv(csvout, na_rep=na_rep, - float_format=float_format, cols=cols, - header=header, index=index, - index_label=index_label) + float_format=float_format, cols=cols, + header=header, index=index, + index_label=index_label) finally: if close: @@ -2014,7 +2016,7 @@ def _getitem_multilevel(self, key): if len(result.columns) == 1: top = result.columns[0] if ((type(top) == str and top == '') or - (type(top) == tuple and top[0] == '')): + (type(top) == tuple and top[0] == '')): result = result[''] if isinstance(result, Series): result = Series(result, index=self.index, name=key) @@ -2163,7 +2165,7 @@ def _sanitize_column(self, key, value): # special case for now if (com.is_float_dtype(existing_piece) and - com.is_integer_dtype(value)): + com.is_integer_dtype(value)): value = value.astype(np.float64) else: @@ -2523,7 +2525,7 @@ def reindex(self, index=None, columns=None, method=None, level=None, if (index is not None and columns is not None and method is None and level is None - and not self._is_mixed_type): + and not self._is_mixed_type): return self._reindex_multi(index, columns, copy, fill_value) if columns is not None: @@ -3154,8 +3156,9 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False): if inplace: if axis == 1: - self._data = self._data.reindex_items(self._data.items[indexer], - copy=False) + self._data = self._data.reindex_items( + self._data.items[indexer], + copy=False) elif axis == 0: self._data = self._data.take(indexer) @@ -3195,8 +3198,9 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): if inplace: if axis == 1: - self._data = self._data.reindex_items(self._data.items[indexer], - copy=False) + self._data = self._data.reindex_items( + self._data.items[indexer], + copy=False) elif axis == 0: self._data = self._data.take(indexer) @@ -3599,7 +3603,7 @@ def _combine_series_infer(self, other, func, fill_value=None): "by default is deprecated. Please use " "DataFrame. to explicitly broadcast arithmetic " "operations along the index"), - FutureWarning) + FutureWarning) return self._combine_match_index(other, func, fill_value) else: return self._combine_match_columns(other, func, fill_value) @@ -3674,7 +3678,7 @@ def combine(self, other, func, fill_value=None): result : DataFrame """ - other_idxlen = len(other.index) # save for compare + other_idxlen = len(other.index) # save for compare this, other = self.align(other, copy=False) new_index = this.index @@ -3735,7 +3739,7 @@ def combine_first(self, other): return self.combine(other, combiner) def update(self, other, join='left', overwrite=True, filter_func=None, - raise_conflict=False): + raise_conflict=False): """ Modify DataFrame in place using non-NA values from passed DataFrame. Aligns on indices @@ -4187,7 +4191,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): result = self._constructor(data=results, index=index) result.rename(columns=dict(zip(range(len(res_index)), res_index)), - inplace=True) + inplace=True) if axis == 1: result = result.T @@ -4235,7 +4239,7 @@ def applymap(self, func): ------- applied : DataFrame """ - + # if we have a dtype == 'M8[ns]', provide boxed values def infer(x): if x.dtype == 'M8[ns]': @@ -4547,7 +4551,7 @@ def describe(self, percentile_width=50): if len(numdata.columns) == 0: return DataFrame(dict((k, v.describe()) for k, v in self.iteritems()), - columns=self.columns) + columns=self.columns) lb = .5 * (1. - percentile_width / 100.) ub = 1. - lb @@ -4781,7 +4785,7 @@ def mad(self, axis=0, skipna=True, level=None): @Substitution(name='variance', shortname='var', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc + - """ + """ Normalized by N-1 (unbiased estimator). """) def var(self, axis=0, skipna=True, level=None, ddof=1): @@ -4794,7 +4798,7 @@ def var(self, axis=0, skipna=True, level=None, ddof=1): @Substitution(name='standard deviation', shortname='std', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc + - """ + """ Normalized by N-1 (unbiased estimator). """) def std(self, axis=0, skipna=True, level=None, ddof=1): @@ -4932,7 +4936,7 @@ def _get_numeric_data(self): return DataFrame(num_data, copy=False) else: if (self.values.dtype != np.object_ and - not issubclass(self.values.dtype.type, np.datetime64)): + not issubclass(self.values.dtype.type, np.datetime64)): return self else: return self.ix[:, []] @@ -5351,7 +5355,7 @@ def extract_index(data): if have_series: if lengths[0] != len(index): msg = ('array length %d does not match index length %d' - % (lengths[0], len(index))) + % (lengths[0], len(index))) raise ValueError(msg) else: index = Index(np.arange(lengths[0])) @@ -5414,7 +5418,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): return arrays, columns if len(data) == 0: - return [], [] # columns if columns is not None else [] + return [], [] # columns if columns is not None else [] if isinstance(data[0], (list, tuple)): return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) @@ -5557,15 +5561,17 @@ def _homogenize(data, index, dtype=None): return homogenized + def _from_nested_dict(data): # TODO: this should be seriously cythonized new_data = OrderedDict() for index, s in data.iteritems(): for col, v in s.iteritems(): - new_data[col]= new_data.get(col,OrderedDict()) + new_data[col] = new_data.get(col, OrderedDict()) new_data[col][index] = v return new_data + def _put_str(s, space): return ('%s' % s)[:space].ljust(space) @@ -5577,8 +5583,8 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrame) def complete_dataframe(obj, prev_completions): - return prev_completions + [c for c in obj.columns \ - if isinstance(c, basestring) and py3compat.isidentifier(c)] + return prev_completions + [c for c in obj.columns + if isinstance(c, basestring) and py3compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 23b65d4d674a4..54d6c24051707 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -39,6 +39,7 @@ aggregated : DataFrame """ + class GroupByError(Exception): pass @@ -563,7 +564,6 @@ def _get_splitter(self, data, axis=0, keep_internal=True): return get_splitter(data, comp_ids, ngroups, axis=axis, keep_internal=keep_internal) - def _get_group_keys(self): if len(self.groupings) == 1: return self.levels[0] @@ -777,7 +777,7 @@ def aggregate(self, values, how, axis=0): (counts > 0).view(np.uint8)) else: result = lib.row_bool_subset_object(result, - (counts > 0).view(np.uint8)) + (counts > 0).view(np.uint8)) else: result = result[counts > 0] @@ -962,7 +962,6 @@ def get_iterator(self, data, axis=0): inds = range(edge, n) yield self.binlabels[-1], data.take(inds, axis=axis) - def apply(self, f, data, axis=0, keep_internal=False): result_keys = [] result_values = [] @@ -1228,7 +1227,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): if (not any_callable and not all_in_columns and not any_arraylike and match_axis_length - and level is None): + and level is None): keys = [com._asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): @@ -1759,9 +1758,9 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): if isinstance(values[0], np.ndarray): if (isinstance(values[0], Series) and - not _all_indexes_same([x.index for x in values])): + not _all_indexes_same([x.index for x in values])): return self._concat_objects(keys, values, - not_indexed_same=not_indexed_same) + not_indexed_same=not_indexed_same) if self.axis == 0: stacked_values = np.vstack([np.asarray(x) @@ -2118,6 +2117,7 @@ class SeriesSplitter(DataSplitter): def _chop(self, sdata, slice_obj): return sdata._get_values(slice_obj) + class FrameSplitter(DataSplitter): def __init__(self, data, labels, ngroups, axis=0, keep_internal=False): @@ -2141,7 +2141,8 @@ def _chop(self, sdata, slice_obj): if self.axis == 0: return sdata[slice_obj] else: - return sdata._slice(slice_obj, axis=1) # ix[:, slice_obj] + return sdata._slice(slice_obj, axis=1) # ix[:, slice_obj] + class NDFrameSplitter(DataSplitter): @@ -2201,6 +2202,8 @@ def get_group_index(label_list, shape): return group_index _INT64_MAX = np.iinfo(np.int64).max + + def _int64_overflow_possible(shape): the_prod = 1L for x in shape: @@ -2294,7 +2297,6 @@ def get_key(self, comp_id): for table, level in izip(self.tables, self.levels)) - def _get_indices_dict(label_list, keys): shape = [len(x) for x in keys] group_index = get_group_index(label_list, shape) @@ -2369,6 +2371,7 @@ def _reorder_by_uniques(uniques, labels): np.median: 'median' } + def _is_numeric_dtype(dt): typ = dt.type return (issubclass(typ, (np.number, np.bool_)) @@ -2413,7 +2416,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrameGroupBy) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.obj.columns - if isinstance(c, basestring) and py3compat.isidentifier(c)] + if isinstance(c, basestring) and py3compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/index.py b/pandas/core/index.py index 8ac5f6feb9d82..7d9bc7eccff2d 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -121,7 +121,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None): return Int64Index(subarr.astype('i8'), name=name) elif inferred != 'string': if (inferred.startswith('datetime') or - tslib.is_timestamp_array(subarr)): + tslib.is_timestamp_array(subarr)): from pandas.tseries.index import DatetimeIndex return DatetimeIndex(subarr, copy=copy, name=name) @@ -162,7 +162,7 @@ def __bytes__(self): Yields a bytestring in both py2/py3. """ encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding , 'replace') + return self.__unicode__().encode(encoding, 'replace') def __unicode__(self): """ @@ -175,7 +175,7 @@ def __unicode__(self): else: data = self - prepr = com.pprint_thing(data, escape_chars=('\t','\r','\n')) + prepr = com.pprint_thing(data, escape_chars=('\t', '\r', '\n')) return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype) def __repr__(self): @@ -426,7 +426,7 @@ def format(self, name=False, formatter=None): header = [] if name: header.append(com.pprint_thing(self.name, - escape_chars=('\t','\r','\n')) + escape_chars=('\t', '\r', '\n')) if self.name is not None else '') if formatter is not None: @@ -447,7 +447,7 @@ def format(self, name=False, formatter=None): values = lib.maybe_convert_objects(values, safe=1) if values.dtype == np.object_: - result = [com.pprint_thing(x,escape_chars=('\t','\r','\n')) + result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) for x in values] else: result = _trim_front(format_array(values, None, justify='left')) @@ -1300,7 +1300,8 @@ class MultiIndex(Index): def __new__(cls, levels=None, labels=None, sortorder=None, names=None): if len(levels) != len(labels): - raise AssertionError('Length of levels and labels must be the same') + raise AssertionError( + 'Length of levels and labels must be the same') if len(levels) == 0: raise Exception('Must pass non-zero number of levels/labels') @@ -1383,7 +1384,7 @@ def __bytes__(self): Yields a bytestring in both py2/py3. """ encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding , 'replace') + return self.__unicode__().encode(encoding, 'replace') def __unicode__(self): """ @@ -1402,7 +1403,7 @@ def __unicode__(self): else: values = self.values - summary = com.pprint_thing(values, escape_chars=('\t','\r','\n')) + summary = com.pprint_thing(values, escape_chars=('\t', '\r', '\n')) np.set_printoptions(threshold=options['threshold']) @@ -1557,7 +1558,7 @@ def get_level_values(self, level): values : ndarray """ num = self._get_level_number(level) - unique_vals = self.levels[num] # .values + unique_vals = self.levels[num] # .values labels = self.labels[num] return unique_vals.take(labels) @@ -1572,7 +1573,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, formatted = lev.take(lab).format(formatter=formatter) else: # weird all NA case - formatted = [com.pprint_thing(x,escape_chars=('\t','\r','\n')) + formatted = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) for x in com.take_1d(lev.values, lab)] stringified_levels.append(formatted) @@ -1581,7 +1582,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, level = [] if names: - level.append(com.pprint_thing(name,escape_chars=('\t','\r','\n')) + level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n')) if name is not None else '') level.extend(np.array(lev, dtype=object)) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 53eb18c12f172..a2aca3be39811 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -10,9 +10,11 @@ # "null slice" _NS = slice(None, None) + class IndexingError(Exception): pass + class _NDFrameIndexer(object): def __init__(self, obj): @@ -36,7 +38,7 @@ def __getitem__(self, key): def _get_label(self, label, axis=0): # ueber-hack if (isinstance(label, tuple) and - isinstance(label[axis], slice)): + isinstance(label[axis], slice)): raise IndexingError('no slices here') @@ -111,7 +113,8 @@ def _setitem_with_indexer(self, indexer, value): data = self.obj[item] values = data.values if np.prod(values.shape): - value = com._possibly_cast_to_datetime(value,getattr(data,'dtype',None)) + value = com._possibly_cast_to_datetime( + value, getattr(data, 'dtype', None)) values[plane_indexer] = value except ValueError: for item, v in zip(item_labels[het_idx], value): @@ -251,7 +254,7 @@ def _multi_take(self, tup): elif isinstance(self.obj, Panel4D): conv = [self._convert_for_reindex(x, axis=i) for i, x in enumerate(tup)] - return self.obj.reindex(labels=tup[0],items=tup[1], major=tup[2], minor=tup[3]) + return self.obj.reindex(labels=tup[0], items=tup[1], major=tup[2], minor=tup[3]) elif isinstance(self.obj, Panel): conv = [self._convert_for_reindex(x, axis=i) for i, x in enumerate(tup)] @@ -312,7 +315,7 @@ def _getitem_lowerdim(self, tup): # unfortunately need an odious kludge here because of # DataFrame transposing convention if (isinstance(section, DataFrame) and i > 0 - and len(new_key) == 2): + and len(new_key) == 2): a, b = new_key new_key = b, a @@ -399,7 +402,7 @@ def _reindex(keys, level=None): # this is not the most robust, but... if (isinstance(labels, MultiIndex) and - not isinstance(keyarr[0], tuple)): + not isinstance(keyarr[0], tuple)): level = 0 else: level = None @@ -500,7 +503,7 @@ def _convert_to_indexer(self, obj, axis=0): # this is not the most robust, but... if (isinstance(labels, MultiIndex) and - not isinstance(objarr[0], tuple)): + not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 57844656bf113..e3031b58ff286 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -11,6 +11,7 @@ from pandas.util import py3compat + class Block(object): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas @@ -64,8 +65,8 @@ def set_ref_items(self, ref_items, maybe_rename=True): def __repr__(self): shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) name = type(self).__name__ - result = '%s: %s, %s, dtype %s' % (name, com.pprint_thing(self.items) - , shape, self.dtype) + result = '%s: %s, %s, dtype %s' % ( + name, com.pprint_thing(self.items), shape, self.dtype) if py3compat.PY3: return unicode(result) return com.console_encode(result) @@ -194,12 +195,12 @@ def split_block_at(self, item): loc = self.items.get_loc(item) if type(loc) == slice or type(loc) == int: - mask = [True]*len(self) + mask = [True] * len(self) mask[loc] = False - else: # already a mask, inverted + else: # already a mask, inverted mask = -loc - for s,e in com.split_ranges(mask): + for s, e in com.split_ranges(mask): yield make_block(self.values[s:e], self.items[s:e].copy(), self.ref_items) @@ -270,7 +271,7 @@ def interpolate(self, method='pad', axis=0, inplace=False, if missing is None: mask = None - else: # todo create faster fill func without masking + else: # todo create faster fill func without masking mask = _mask_missing(transf(values), missing) if method == 'pad': @@ -323,7 +324,7 @@ def _can_hold_element(self, element): def _try_cast(self, element): try: return float(element) - except: # pragma: no cover + except: # pragma: no cover return element def should_store(self, value): @@ -341,7 +342,7 @@ def _can_hold_element(self, element): def _try_cast(self, element): try: return complex(element) - except: # pragma: no cover + except: # pragma: no cover return element def should_store(self, value): @@ -357,7 +358,7 @@ def _can_hold_element(self, element): def _try_cast(self, element): try: return int(element) - except: # pragma: no cover + except: # pragma: no cover return element def should_store(self, value): @@ -373,7 +374,7 @@ def _can_hold_element(self, element): def _try_cast(self, element): try: return bool(element) - except: # pragma: no cover + except: # pragma: no cover return element def should_store(self, value): @@ -396,6 +397,7 @@ def should_store(self, value): _NS_DTYPE = np.dtype('M8[ns]') + class DatetimeBlock(Block): _can_hold_na = True @@ -467,11 +469,12 @@ def make_block(values, items, ref_items): inferred_type = lib.infer_dtype(flat) if inferred_type == 'datetime': - # we have an object array that has been inferred as datetime, so convert it + # we have an object array that has been inferred as datetime, so + # convert it try: values = tslib.array_to_datetime(flat).reshape(values.shape) klass = DatetimeBlock - except: # it already object, so leave it + except: # it already object, so leave it pass if klass is None: @@ -511,7 +514,6 @@ def __init__(self, blocks, axes, do_integrity_check=True): 'equal number of axes (%d)') % (block.values.ndim, ndim)) - if do_integrity_check: self._verify_integrity() @@ -674,12 +676,12 @@ def _get_clean_block_types(self, type_list): except TypeError: type_list = (type_list,) - type_map = {int : IntBlock, float : FloatBlock, - complex : ComplexBlock, - np.datetime64 : DatetimeBlock, - datetime : DatetimeBlock, - bool : BoolBlock, - object : ObjectBlock} + type_map = {int: IntBlock, float: FloatBlock, + complex: ComplexBlock, + np.datetime64: DatetimeBlock, + datetime: DatetimeBlock, + bool: BoolBlock, + object: ObjectBlock} type_list = tuple([type_map.get(t, t) for t in type_list]) return type_list @@ -890,14 +892,14 @@ def iget(self, i): # ugh try: inds, = (self.items == item).nonzero() - except AttributeError: #MultiIndex + except AttributeError: # MultiIndex inds, = self.items.map(lambda x: x == item).nonzero() _, block = self._find_block(item) try: binds, = (block.items == item).nonzero() - except AttributeError: #MultiIndex + except AttributeError: # MultiIndex binds, = block.items.map(lambda x: x == item).nonzero() for j, (k, b) in enumerate(zip(inds, binds)): @@ -925,8 +927,8 @@ def delete(self, item): loc = self.items.get_loc(item) self._delete_from_block(i, item) - if com._is_bool_indexer(loc): # dupe keys may return mask - loc = [i for i,v in enumerate(loc) if v] + if com._is_bool_indexer(loc): # dupe keys may return mask + loc = [i for i, v in enumerate(loc) if v] new_items = self.items.delete(loc) @@ -960,7 +962,8 @@ def _set_item(item, arr): else: subset = self.items[loc] if len(value) != len(subset): - raise AssertionError('Number of items to set did not match') + raise AssertionError( + 'Number of items to set did not match') for i, (item, arr) in enumerate(zip(subset, value)): _set_item(item, arr[None, :]) except KeyError: @@ -1005,7 +1008,7 @@ def _add_new_block(self, item, value, loc=None): # hm, elaborate hack? if loc is None: loc = self.items.get_loc(item) - new_block = make_block(value, self.items[loc:loc+1].copy(), + new_block = make_block(value, self.items[loc:loc + 1].copy(), self.items) self.blocks.append(new_block) @@ -1311,6 +1314,7 @@ def item_dtypes(self): raise AssertionError('Some items were not in any block') return result + def form_blocks(arrays, names, axes): # pre-filter out items if we passed it items = axes[0] @@ -1344,7 +1348,7 @@ def form_blocks(arrays, names, axes): elif issubclass(v.dtype.type, np.integer): if v.dtype == np.uint64: # HACK #2355 definite overflow - if (v > 2**63 - 1).any(): + if (v > 2 ** 63 - 1).any(): object_items.append((k, v)) continue int_items.append((k, v)) @@ -1392,14 +1396,16 @@ def form_blocks(arrays, names, axes): return blocks + def _simple_blockify(tuples, ref_items, dtype): block_items, values = _stack_arrays(tuples, ref_items, dtype) # CHECK DTYPE? - if values.dtype != dtype: # pragma: no cover + if values.dtype != dtype: # pragma: no cover values = values.astype(dtype) return make_block(values, block_items, ref_items) + def _stack_arrays(tuples, ref_items, dtype): from pandas.core.series import Series @@ -1432,6 +1438,7 @@ def _shape_compat(x): return items, stacked + def _blocks_to_series_dict(blocks, index=None): from pandas.core.series import Series @@ -1442,6 +1449,7 @@ def _blocks_to_series_dict(blocks, index=None): series_dict[item] = Series(vec, index=index, name=item) return series_dict + def _interleaved_dtype(blocks): from collections import defaultdict counts = defaultdict(lambda: 0) @@ -1458,7 +1466,7 @@ def _interleaved_dtype(blocks): if (have_object or (have_bool and have_numeric) or - (have_numeric and have_dt64)): + (have_numeric and have_dt64)): return np.dtype(object) elif have_bool: return np.dtype(bool) @@ -1471,6 +1479,7 @@ def _interleaved_dtype(blocks): else: return np.dtype('f8') + def _consolidate(blocks, items): """ Merge blocks having same dtype @@ -1499,6 +1508,7 @@ def _merge_blocks(blocks, items): new_block = make_block(new_values, new_items, items) return new_block.reindex_items_from(items) + def _vstack(to_stack): if all(x.dtype == _NS_DTYPE for x in to_stack): # work around NumPy 1.6 bug diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ba0dc91f4aa6f..1315fc3ce2b76 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -32,7 +32,8 @@ def f(values, axis=None, skipna=True, **kwds): if values.ndim == 1: return 0 else: - result_shape = values.shape[:axis] + values.shape[axis + 1:] + result_shape = values.shape[: + axis] + values.shape[axis + 1:] result = np.empty(result_shape) result.fill(0) return result @@ -51,6 +52,7 @@ def f(values, axis=None, skipna=True, **kwds): return f + def _bn_ok_dtype(dt): # Bottleneck chokes on datetime64 return dt != np.object_ and not issubclass(dt.type, np.datetime64) @@ -167,7 +169,7 @@ def _nanmin(values, axis=None, skipna=True): # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ - and sys.version_info[0] >= 3): # pragma: no cover + and sys.version_info[0] >= 3): # pragma: no cover import __builtin__ if values.ndim > 1: apply_ax = axis if axis is not None else 0 @@ -176,7 +178,7 @@ def _nanmin(values, axis=None, skipna=True): result = __builtin__.min(values) else: if ((axis is not None and values.shape[axis] == 0) - or values.size == 0): + or values.size == 0): result = com.ensure_float(values.sum(axis)) result.fill(np.nan) else: @@ -205,7 +207,7 @@ def _nanmax(values, axis=None, skipna=True): # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ - and sys.version_info[0] >= 3): # pragma: no cover + and sys.version_info[0] >= 3): # pragma: no cover import __builtin__ if values.ndim > 1: @@ -215,7 +217,7 @@ def _nanmax(values, axis=None, skipna=True): result = __builtin__.max(values) else: if ((axis is not None and values.shape[axis] == 0) - or values.size == 0): + or values.size == 0): result = com.ensure_float(values.sum(axis)) result.fill(np.nan) else: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 15bdc56690039..6b867f9a643db 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -97,7 +97,7 @@ def _arith_method(func, name): def f(self, other): if not np.isscalar(other): raise ValueError('Simple arithmetic with %s can only be ' - 'done with scalar values' % self._constructor.__name__) + 'done with scalar values' % self._constructor.__name__) return self._combine(other, func) f.__name__ = name @@ -116,7 +116,7 @@ def na_op(x, y): yrav = y.ravel() mask = notnull(xrav) & notnull(yrav) result[mask] = func(np.array(list(xrav[mask])), - np.array(list(yrav[mask]))) + np.array(list(yrav[mask]))) else: mask = notnull(xrav) result[mask] = func(np.array(list(xrav[mask])), y) @@ -134,34 +134,35 @@ def f(self, other): if isinstance(other, self._constructor): return self._compare_constructor(other, func) elif isinstance(other, (self._constructor_sliced, DataFrame, Series)): - raise Exception("input needs alignment for this object [%s]" % self._constructor) + raise Exception("input needs alignment for this object [%s]" % + self._constructor) else: return self._combine_const(other, na_op) - f.__name__ = name return f + class Panel(NDFrame): - _AXIS_ORDERS = ['items','major_axis','minor_axis'] - _AXIS_NUMBERS = dict([ (a,i) for i, a in enumerate(_AXIS_ORDERS) ]) - _AXIS_ALIASES = { - 'major' : 'major_axis', - 'minor' : 'minor_axis' + _AXIS_ORDERS = ['items', 'major_axis', 'minor_axis'] + _AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(_AXIS_ORDERS)]) + _AXIS_ALIASES = { + 'major': 'major_axis', + 'minor': 'minor_axis' } - _AXIS_NAMES = dict([ (i,a) for i, a in enumerate(_AXIS_ORDERS) ]) + _AXIS_NAMES = dict([(i, a) for i, a in enumerate(_AXIS_ORDERS)]) _AXIS_SLICEMAP = { - 'major_axis' : 'index', - 'minor_axis' : 'columns' - } - _AXIS_LEN = len(_AXIS_ORDERS) + 'major_axis': 'index', + 'minor_axis': 'columns' + } + _AXIS_LEN = len(_AXIS_ORDERS) # major _default_stat_axis = 1 # info axis - _het_axis = 0 + _het_axis = 0 _info_axis = _AXIS_ORDERS[_het_axis] items = lib.AxisProperty(0) @@ -175,22 +176,23 @@ def _constructor(self): # return the type of the slice constructor _constructor_sliced = DataFrame - def _construct_axes_dict(self, axes = None, **kwargs): + def _construct_axes_dict(self, axes=None, **kwargs): """ return an axes dictionary for myself """ - d = dict([ (a,getattr(self,a)) for a in (axes or self._AXIS_ORDERS) ]) + d = dict([(a, getattr(self, a)) for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d @staticmethod def _construct_axes_dict_from(self, axes, **kwargs): """ return an axes dictionary for the passed axes """ - d = dict([ (a,ax) for a,ax in zip(self._AXIS_ORDERS,axes) ]) + d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, axes)]) d.update(kwargs) return d - def _construct_axes_dict_for_slice(self, axes = None, **kwargs): + def _construct_axes_dict_for_slice(self, axes=None, **kwargs): """ return an axes dictionary for myself """ - d = dict([ (self._AXIS_SLICEMAP[a],getattr(self,a)) for a in (axes or self._AXIS_ORDERS) ]) + d = dict([(self._AXIS_SLICEMAP[a], getattr(self, a)) + for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d @@ -231,15 +233,16 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input """ - self._init_data( data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, - copy=copy, dtype=dtype) + self._init_data( + data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, + copy=copy, dtype=dtype) def _init_data(self, data, copy, dtype, **kwargs): """ generate ND initialization; axes are passed as required objects to __init__ """ if data is None: data = {} - passed_axes = [ kwargs.get(a) for a in self._AXIS_ORDERS ] + passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS] axes = None if isinstance(data, BlockManager): if any(x is not None for x in passed_axes): @@ -265,7 +268,7 @@ def _from_axes(cls, data, axes): if isinstance(data, BlockManager): return cls(data) else: - d = cls._construct_axes_dict_from(cls, axes, copy = False) + d = cls._construct_axes_dict_from(cls, axes, copy=False) return cls(data, **d) def _init_dict(self, data, axes, dtype=None): @@ -289,7 +292,7 @@ def _init_dict(self, data, axes, dtype=None): # shallow copy arrays = [] - haxis_shape = [ len(a) for a in raxes ] + haxis_shape = [len(a) for a in raxes] for h in haxis: v = values = data.get(h) if v is None: @@ -304,7 +307,7 @@ def _init_dict(self, data, axes, dtype=None): values = v.values arrays.append(values) - return self._init_arrays(arrays, haxis, [ haxis ] + raxes) + return self._init_arrays(arrays, haxis, [haxis] + raxes) def _init_arrays(self, arrays, arr_names, axes): # segregates dtypes and forms blocks matching to columns @@ -314,7 +317,7 @@ def _init_arrays(self, arrays, arr_names, axes): @property def shape(self): - return [ len(getattr(self,a)) for a in self._AXIS_ORDERS ] + return [len(getattr(self, a)) for a in self._AXIS_ORDERS] @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): @@ -356,18 +359,19 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): return cls(**d) def __getitem__(self, key): - if isinstance(getattr(self,self._info_axis), MultiIndex): + if isinstance(getattr(self, self._info_axis), MultiIndex): return self._getitem_multilevel(key) return super(Panel, self).__getitem__(key) def _getitem_multilevel(self, key): - info = getattr(self,self._info_axis) - loc = info.get_loc(key) + info = getattr(self, self._info_axis) + loc = info.get_loc(key) if isinstance(loc, (slice, np.ndarray)): - new_index = info[loc] + new_index = info[loc] result_index = _maybe_droplevels(new_index, key) - slices = [loc] + [slice(None) for x in range(self._AXIS_LEN-1)] - new_values = self.values[slices] + slices = [loc] + [slice(None) for x in range( + self._AXIS_LEN - 1)] + new_values = self.values[slices] d = self._construct_axes_dict(self._AXIS_ORDERS[1:]) d[self._info_axis] = result_index @@ -405,14 +409,14 @@ def __array__(self, dtype=None): return self.values def __array_wrap__(self, result): - d = self._construct_axes_dict(self._AXIS_ORDERS, copy = False) + d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) return self._constructor(result, **d) #---------------------------------------------------------------------- # Comparison methods def _indexed_same(self, other): - return all([ getattr(self,a).equals(getattr(other,a)) for a in self._AXIS_ORDERS ]) + return all([getattr(self, a).equals(getattr(other, a)) for a in self._AXIS_ORDERS]) def _compare_constructor(self, other, func): if not self._indexed_same(other): @@ -420,10 +424,10 @@ def _compare_constructor(self, other, func): 'same type objects') new_data = {} - for col in getattr(self,self._info_axis): + for col in getattr(self, self._info_axis): new_data[col] = func(self[col], other[col]) - d = self._construct_axes_dict(copy = False) + d = self._construct_axes_dict(copy=False) return self._constructor(data=new_data, **d) # boolean operators @@ -475,7 +479,7 @@ def __bytes__(self): Yields a bytestring in both py2/py3. """ encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding , 'replace') + return self.__unicode__().encode(encoding, 'replace') def __unicode__(self): """ @@ -487,17 +491,18 @@ def __unicode__(self): class_name = str(self.__class__) shape = self.shape - dims = u'Dimensions: %s' % ' x '.join([ "%d (%s)" % (s, a) for a,s in zip(self._AXIS_ORDERS,shape) ]) + dims = u'Dimensions: %s' % ' x '.join( + ["%d (%s)" % (s, a) for a, s in zip(self._AXIS_ORDERS, shape)]) def axis_pretty(a): - v = getattr(self,a) + v = getattr(self, a) if len(v) > 0: - return u'%s axis: %s to %s' % (a.capitalize(),com.pprint_thing(v[0]),com.pprint_thing(v[-1])) + return u'%s axis: %s to %s' % (a.capitalize(), com.pprint_thing(v[0]), com.pprint_thing(v[-1])) else: return u'%s axis: None' % a.capitalize() - - output = '\n'.join([class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) + output = '\n'.join( + [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) return output def __repr__(self): @@ -509,10 +514,10 @@ def __repr__(self): return str(self) def __iter__(self): - return iter(getattr(self,self._info_axis)) + return iter(getattr(self, self._info_axis)) def iteritems(self): - for h in getattr(self,self._info_axis): + for h in getattr(self, self._info_axis): yield h, self[h] # Name that won't get automatically converted to items by 2to3. items is @@ -546,7 +551,7 @@ def ix(self): return self._ix def _wrap_array(self, arr, axes, copy=False): - d = self._construct_axes_dict_from(self, axes, copy = copy) + d = self._construct_axes_dict_from(self, axes, copy=copy) return self._constructor(arr, **d) fromDict = from_dict @@ -592,7 +597,7 @@ def to_excel(self, path, na_rep=''): # TODO: needed? def keys(self): - return list(getattr(self,self._info_axis)) + return list(getattr(self, self._info_axis)) def _get_values(self): self._consolidate_inplace() @@ -642,19 +647,20 @@ def set_value(self, *args): otherwise a new object """ # require an arg for each axis and the value - assert(len(args) == self._AXIS_LEN+1) + assert(len(args) == self._AXIS_LEN + 1) try: frame = self._get_item_cache(args[0]) frame.set_value(*args[1:]) return self except KeyError: - axes = self._expand_axes(args) - d = self._construct_axes_dict_from(self, axes, copy = False) + axes = self._expand_axes(args) + d = self._construct_axes_dict_from(self, axes, copy=False) result = self.reindex(**d) likely_dtype = com._infer_dtype(args[-1]) - made_bigger = not np.array_equal(axes[0], getattr(self,self._info_axis)) + made_bigger = not np.array_equal( + axes[0], getattr(self, self._info_axis)) # how to make this logic simpler? if made_bigger: com._possibly_cast_item(result, args[0], likely_dtype) @@ -668,7 +674,7 @@ def _box_item_values(self, key, values): def __getattr__(self, name): """After regular attribute access, try looking up the name of an item. This allows simpler access to items for interactive use.""" - if name in getattr(self,self._info_axis): + if name in getattr(self, self._info_axis): return self[name] raise AttributeError("'%s' object has no attribute '%s'" % (type(self).__name__, name)) @@ -680,7 +686,8 @@ def _slice(self, slobj, axis=0): def __setitem__(self, key, value): shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): - value = value.reindex(**self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])) + value = value.reindex( + **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])) mat = value.values elif isinstance(value, np.ndarray): assert(value.shape == shape[1:]) @@ -784,7 +791,7 @@ def reindex(self, major=None, minor=None, method=None, major = _mut_exclusive(major, major_axis) minor = _mut_exclusive(minor, minor_axis) - al = self._AXIS_LEN + al = self._AXIS_LEN # only allowing multi-index on Panel (and not > dims) if (method is None and not self._is_mixed_type and al <= 3): @@ -796,12 +803,12 @@ def reindex(self, major=None, minor=None, method=None, pass if major is not None: - result = result._reindex_axis(major, method, al-2, copy) + result = result._reindex_axis(major, method, al - 2, copy) if minor is not None: - result = result._reindex_axis(minor, method, al-1, copy) + result = result._reindex_axis(minor, method, al - 1, copy) - for i, a in enumerate(self._AXIS_ORDERS[0:al-2]): + for i, a in enumerate(self._AXIS_ORDERS[0:al - 2]): a = kwargs.get(a) if a is not None: result = result._reindex_axis(a, method, i, copy) @@ -880,7 +887,7 @@ def reindex_like(self, other, method=None): ------- reindexed : Panel """ - d = other._construct_axes_dict(method = method) + d = other._construct_axes_dict(method=method) return self.reindex(**d) def dropna(self, axis=0, how='any'): @@ -947,7 +954,7 @@ def _combine_frame(self, other, func, axis=0): new_values = new_values.swapaxes(0, 2) return self._constructor(new_values, self.items, self.major_axis, - self.minor_axis) + self.minor_axis) def _combine_panel(self, other, func): items = self.items + other.items @@ -1001,14 +1008,12 @@ def fillna(self, value=None, method=None): new_data = self._data.fillna(value) return self._constructor(new_data) - def ffill(self): return self.fillna(method='ffill') def bfill(self): return self.fillna(method='bfill') - def major_xs(self, key, copy=True): """ Return slice of panel along major axis @@ -1025,7 +1030,7 @@ def major_xs(self, key, copy=True): y : DataFrame index -> minor axis, columns -> items """ - return self.xs(key, axis=self._AXIS_LEN-2, copy=copy) + return self.xs(key, axis=self._AXIS_LEN - 2, copy=copy) def minor_xs(self, key, copy=True): """ @@ -1043,7 +1048,7 @@ def minor_xs(self, key, copy=True): y : DataFrame index -> major axis, columns -> items """ - return self.xs(key, axis=self._AXIS_LEN-1, copy=copy) + return self.xs(key, axis=self._AXIS_LEN - 1, copy=copy) def xs(self, key, axis=1, copy=True): """ @@ -1148,7 +1153,8 @@ def transpose(self, *args, **kwargs): try: kwargs[a] = args.pop(0) except (IndexError): - raise ValueError("not enough arguments specified to transpose!") + raise ValueError( + "not enough arguments specified to transpose!") axes = [self._get_axis_number(kwargs[a]) for a in self._AXIS_ORDERS] @@ -1156,7 +1162,8 @@ def transpose(self, *args, **kwargs): if len(axes) != len(set(axes)): raise ValueError('Must specify %s unique axes' % self._AXIS_LEN) - new_axes = self._construct_axes_dict_from(self, [ self._get_axis(x) for x in axes]) + new_axes = self._construct_axes_dict_from( + self, [self._get_axis(x) for x in axes]) new_values = self.values.transpose(tuple(axes)) if kwargs.get('copy') or (len(args) and args[-1]): new_values = new_values.copy() @@ -1266,9 +1273,10 @@ def _wrap_result(self, result, axis): # do we have reduced dimensionalility? if self.ndim == result.ndim: return self._constructor(result, **self._construct_axes_dict()) - elif self.ndim == result.ndim+1: + elif self.ndim == result.ndim + 1: return self._constructor_sliced(result, **self._extract_axes_for_slice(self, axes)) - raise PandasError("invalid _wrap_result [self->%s] [result->%s]" % (self.ndim,result.ndim)) + raise PandasError("invalid _wrap_result [self->%s] [result->%s]" % + (self.ndim, result.ndim)) def count(self, axis='major'): """ @@ -1313,7 +1321,7 @@ def shift(self, lags, axis='major'): vslicer = slice(None, -lags) islicer = slice(lags, None) elif lags == 0: - vslicer = islicer =slice(None) + vslicer = islicer = slice(None) else: vslicer = slice(-lags, None) islicer = slice(None, lags) @@ -1428,8 +1436,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, other = self._constructor(other) axis = self._info_axis - axis_values = getattr(self,axis) - other = other.reindex(**{ axis : axis_values }) + axis_values = getattr(self, axis) + other = other.reindex(**{axis: axis_values}) for frame in axis_values: self[frame].update(other[frame], join, overwrite, filter_func, @@ -1452,12 +1460,12 @@ def _get_join_index(self, other, how): @staticmethod def _extract_axes(self, data, axes, **kwargs): """ return a list of the axis indicies """ - return [ self._extract_axis(self, data, axis=i, **kwargs) for i, a in enumerate(axes) ] + return [self._extract_axis(self, data, axis=i, **kwargs) for i, a in enumerate(axes)] @staticmethod def _extract_axes_for_slice(self, axes): """ return the slice dictionary for these axes """ - return dict([ (self._AXIS_SLICEMAP[i], a) for i, a in zip(self._AXIS_ORDERS[self._AXIS_LEN-len(axes):],axes) ]) + return dict([(self._AXIS_SLICEMAP[i], a) for i, a in zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes)]) @staticmethod def _prep_ndarray(self, values, copy=True): @@ -1496,10 +1504,12 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None): else: adj_frames[k] = v - axes = self._AXIS_ORDERS[1:] - axes_dict = dict([ (a,ax) for a,ax in zip(axes,self._extract_axes(self, adj_frames, axes, intersect=intersect)) ]) + axes = self._AXIS_ORDERS[1:] + axes_dict = dict([(a, ax) for a, ax in zip(axes, self._extract_axes( + self, adj_frames, axes, intersect=intersect))]) - reindex_dict = dict([ (self._AXIS_SLICEMAP[a],axes_dict[a]) for a in axes ]) + reindex_dict = dict( + [(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes]) reindex_dict['copy'] = False for key, frame in adj_frames.iteritems(): if frame is not None: @@ -1560,7 +1570,7 @@ def _add_aggregate_operations(cls): Parameters ---------- -other : """ + "%s or %s" % (cls._constructor_sliced.__name__,cls.__name__) + """ +other : """ + "%s or %s" % (cls._constructor_sliced.__name__, cls.__name__) + """ axis : {""" + ', '.join(cls._AXIS_ORDERS) + "}" + """ Axis to broadcast over @@ -1571,7 +1581,7 @@ def _add_aggregate_operations(cls): def _panel_arith_method(op, name): @Substitution(op) @Appender(_agg_doc) - def f(self, other, axis = 0): + def f(self, other, axis=0): return self._combine(other, op, axis=axis) f.__name__ = name return f @@ -1584,15 +1594,15 @@ def f(self, other, axis = 0): cls.divide = cls.div = _panel_arith_method(operator.div, 'divide') except AttributeError: # pragma: no cover # Python 3 - cls.divide = cls.div = _panel_arith_method(operator.truediv, 'divide') - + cls.divide = cls.div = _panel_arith_method( + operator.truediv, 'divide') _agg_doc = """ Return %(desc)s over requested axis Parameters ---------- -axis : {""" + ', '.join(cls._AXIS_ORDERS) + "} or {" + ', '.join([ str(i) for i in range(cls._AXIS_LEN) ]) + """} +axis : {""" + ', '.join(cls._AXIS_ORDERS) + "} or {" + ', '.join([str(i) for i in range(cls._AXIS_LEN)]) + """} skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA @@ -1683,8 +1693,8 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(Panel) def complete_dataframe(obj, prev_completions): - return prev_completions + [c for c in obj.keys() \ - if isinstance(c, basestring) and py3compat.isidentifier(c)] + return prev_completions + [c for c in obj.keys() + if isinstance(c, basestring) and py3compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless # we're in IPython (when those modules are loaded anyway). diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index 71e3e7d68e252..94c6941cb24f1 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -4,15 +4,14 @@ from pandas.core.panel import Panel Panel4D = create_nd_panel_factory( - klass_name = 'Panel4D', - axis_orders = [ 'labels','items','major_axis','minor_axis'], - axis_slices = { 'labels' : 'labels', 'items' : 'items', - 'major_axis' : 'major_axis', - 'minor_axis' : 'minor_axis' }, - slicer = Panel, - axis_aliases = { 'major' : 'major_axis', 'minor' : 'minor_axis' }, - stat_axis = 2) - + klass_name='Panel4D', + axis_orders=['labels', 'items', 'major_axis', 'minor_axis'], + axis_slices={'labels': 'labels', 'items': 'items', + 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer=Panel, + axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + stat_axis=2) def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, @@ -39,5 +38,3 @@ def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, copy=copy, dtype=dtype) Panel4D.__init__ = panel4d_init - - diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index b7d2e29a3c79d..7c816b7beeea6 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -2,7 +2,8 @@ import pandas.lib as lib -def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases = None, stat_axis = 2): + +def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2): """ manufacture a n-d class: parameters @@ -11,7 +12,7 @@ def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_a axis_orders : the names of the axes in order (highest to lowest) axis_slices : a dictionary that defines how the axes map to the sliced axis slicer : the class representing a slice of this panel - axis_aliases: a dictionary defining aliases for various axes + axis_aliases: a dictionary defining aliases for various axes default = { major : major_axis, minor : minor_axis } stat_axis : the default statistic axis default = 2 @@ -26,56 +27,57 @@ def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_a """ # if slicer is a name, get the object - if isinstance(slicer,basestring): + if isinstance(slicer, basestring): import pandas try: - slicer = getattr(pandas,slicer) + slicer = getattr(pandas, slicer) except: raise Exception("cannot create this slicer [%s]" % slicer) # build the klass - klass = type(klass_name, (slicer,),{}) + klass = type(klass_name, (slicer,), {}) # add the class variables - klass._AXIS_ORDERS = axis_orders - klass._AXIS_NUMBERS = dict([ (a,i) for i, a in enumerate(axis_orders) ]) - klass._AXIS_ALIASES = axis_aliases or dict() - klass._AXIS_NAMES = dict([ (i,a) for i, a in enumerate(axis_orders) ]) + klass._AXIS_ORDERS = axis_orders + klass._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axis_orders)]) + klass._AXIS_ALIASES = axis_aliases or dict() + klass._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axis_orders)]) klass._AXIS_SLICEMAP = axis_slices - klass._AXIS_LEN = len(axis_orders) + klass._AXIS_LEN = len(axis_orders) klass._default_stat_axis = stat_axis - klass._het_axis = 0 - klass._info_axis = axis_orders[klass._het_axis] + klass._het_axis = 0 + klass._info_axis = axis_orders[klass._het_axis] klass._constructor_sliced = slicer # add the axes for i, a in enumerate(axis_orders): - setattr(klass,a,lib.AxisProperty(i)) + setattr(klass, a, lib.AxisProperty(i)) #### define the methods #### def __init__(self, *args, **kwargs): if not (kwargs.get('data') or len(args)): - raise Exception("must supply at least a data argument to [%s]" % klass_name) + raise Exception( + "must supply at least a data argument to [%s]" % klass_name) if 'copy' not in kwargs: kwargs['copy'] = False if 'dtype' not in kwargs: kwargs['dtype'] = None - self._init_data( *args, **kwargs) + self._init_data(*args, **kwargs) klass.__init__ = __init__ def _get_plane_axes(self, axis): - axis = self._get_axis_name(axis) - index = self._AXIS_ORDERS.index(axis) + axis = self._get_axis_name(axis) + index = self._AXIS_ORDERS.index(axis) planes = [] if index: planes.extend(self._AXIS_ORDERS[0:index]) if index != self._AXIS_LEN: - planes.extend(self._AXIS_ORDERS[index+1:]) + planes.extend(self._AXIS_ORDERS[index + 1:]) - return [ getattr(self,p) for p in planes ] + return [getattr(self, p) for p in planes] klass._get_plane_axes = _get_plane_axes def _combine(self, other, func, axis=0): @@ -89,27 +91,26 @@ def _combine_with_constructor(self, other, func): # combine labels to form new axes new_axes = [] for a in self._AXIS_ORDERS: - new_axes.append(getattr(self,a) + getattr(other,a)) + new_axes.append(getattr(self, a) + getattr(other, a)) # reindex: could check that everything's the same size, but forget it - d = dict([ (a,ax) for a,ax in zip(self._AXIS_ORDERS,new_axes) ]) + d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes)]) d['copy'] = False this = self.reindex(**d) other = other.reindex(**d) - + result_values = func(this.values, other.values) return self._constructor(result_values, **d) klass._combine_with_constructor = _combine_with_constructor # set as NonImplemented operations which we don't support - for f in ['to_frame','to_excel','to_sparse','groupby','join','filter','dropna','shift','take']: + for f in ['to_frame', 'to_excel', 'to_sparse', 'groupby', 'join', 'filter', 'dropna', 'shift', 'take']: def func(self, *args, **kwargs): raise NotImplementedError - setattr(klass,f,func) + setattr(klass, f, func) # add the aggregate operations klass._add_aggregate_operations() return klass - diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 86923df06c376..f01ea567850e4 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -156,8 +156,8 @@ def get_new_values(self): # is there a simpler / faster way of doing this? for i in xrange(values.shape[1]): - chunk = new_values[:, i * width : (i + 1) * width] - mask_chunk = new_mask[:, i * width : (i + 1) * width] + chunk = new_values[:, i * width: (i + 1) * width] + mask_chunk = new_mask[:, i * width: (i + 1) * width] chunk.flat[self.mask] = self.sorted_values[:, i] mask_chunk.flat[self.mask] = True @@ -394,9 +394,10 @@ def _unstack_frame(obj, level): value_columns=obj.columns) return unstacker.get_result() + def get_compressed_ids(labels, sizes): # no overflow - if _long_prod(sizes) < 2**63: + if _long_prod(sizes) < 2 ** 63: group_index = get_group_index(labels, sizes) comp_index, obs_ids = _compress_group_index(group_index) else: @@ -405,9 +406,9 @@ def get_compressed_ids(labels, sizes): for v in labels: mask |= v < 0 - while _long_prod(sizes) >= 2**63: + while _long_prod(sizes) >= 2 ** 63: i = len(sizes) - while _long_prod(sizes[:i]) >= 2**63: + while _long_prod(sizes[:i]) >= 2 ** 63: i -= 1 rem_index, rem_ids = get_compressed_ids(labels[:i], @@ -419,12 +420,14 @@ def get_compressed_ids(labels, sizes): return comp_index, obs_ids + def _long_prod(vals): result = 1L for x in vals: result *= x return result + def stack(frame, level=-1, dropna=True): """ Convert DataFrame to Series with multi-level Index. Columns become the @@ -793,6 +796,7 @@ def block2d_to_block3d(values, items, shape, major_labels, minor_labels, return make_block(pvalues, items, ref_items) + def block2d_to_blocknd(values, items, shape, labels, ref_items=None): """ pivot to the labels shape """ from pandas.core.internals import make_block @@ -802,7 +806,7 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None): # Create observation selection vector using major and minor # labels, for converting to panel format. - selector = factor_indexer(shape[1:],labels) + selector = factor_indexer(shape[1:], labels) mask = np.zeros(np.prod(shape), dtype=bool) mask.put(selector, True) @@ -822,7 +826,8 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None): return make_block(pvalues, items, ref_items) + def factor_indexer(shape, labels): """ given a tuple of shape and a list of Factor lables, return the expanded label indexer """ - mult = np.array(shape)[::-1].cumprod()[::-1] - return np.sum(np.array(labels).T * np.append(mult,[1]), axis=1).T + mult = np.array(shape)[::-1].cumprod()[::-1] + return np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T diff --git a/pandas/core/series.py b/pandas/core/series.py index 9e0e0571239c3..56b6e06844b86 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -172,7 +172,7 @@ def na_op(x, y): if isinstance(y, np.ndarray): if (x.dtype == np.bool_ and - y.dtype == np.bool_): # pragma: no cover + y.dtype == np.bool_): # pragma: no cover result = op(x, y) # when would this be hit? else: x = com._ensure_object(x) @@ -644,7 +644,7 @@ def __setitem__(self, key, value): return except KeyError: if (com.is_integer(key) - and not self.index.inferred_type == 'integer'): + and not self.index.inferred_type == 'integer'): values[key] = value return @@ -768,7 +768,7 @@ def convert_objects(self, convert_dates=True): """ if self.dtype == np.object_: return Series(lib.maybe_convert_objects( - self, convert_datetime=convert_dates), self.index) + self, convert_datetime=convert_dates), self.index) return self def repeat(self, reps): @@ -932,7 +932,6 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): return df.reset_index(level=level, drop=drop) - def __str__(self): """ Return a string representation for a particular DataFrame @@ -953,7 +952,7 @@ def __bytes__(self): Yields a bytestring in both py2/py3. """ encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding , 'replace') + return self.__unicode__().encode(encoding, 'replace') def __unicode__(self): """ @@ -1001,7 +1000,8 @@ def _tidy_repr(self, max_vals=20): return unicode(result) def _repr_footer(self): - namestr = u"Name: %s, " % com.pprint_thing(self.name) if self.name is not None else "" + namestr = u"Name: %s, " % com.pprint_thing( + self.name) if self.name is not None else "" return u'%sLength: %d' % (namestr, len(self)) def to_string(self, buf=None, na_rep='NaN', float_format=None, @@ -1341,7 +1341,7 @@ def max(self, axis=None, out=None, skipna=True, level=None): @Substitution(name='standard deviation', shortname='stdev', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc + - """ + """ Normalized by N-1 (unbiased estimator). """) def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, @@ -1354,7 +1354,7 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, @Substitution(name='variance', shortname='var', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc + - """ + """ Normalized by N-1 (unbiased estimator). """) def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, @@ -1632,10 +1632,11 @@ def pretty_name(x): names = ['count'] data = [self.count()] names += ['mean', 'std', 'min', pretty_name(lb), '50%', - pretty_name(ub), 'max'] + pretty_name(ub), 'max'] data += [self.mean(), self.std(), self.min(), - self.quantile(lb), self.median(), self.quantile(ub), - self.max()] + self.quantile( + lb), self.median(), self.quantile(ub), + self.max()] return Series(data, index=names) @@ -1932,7 +1933,7 @@ def sort(self, axis=0, kind='quicksort', order=None): true_base = true_base.base if (true_base is not None and - (true_base.ndim != 1 or true_base.shape != self.shape)): + (true_base.ndim != 1 or true_base.shape != self.shape)): raise Exception('This Series is a view of some other array, to ' 'sort in-place you must create a copy') @@ -2357,7 +2358,7 @@ def reindex(self, index=None, method=None, level=None, fill_value=np.nan, return Series(nan, index=index, name=self.name) new_index, indexer = self.index.reindex(index, method=method, - level=level, limit=limit) + level=level, limit=limit) new_values = com.take_1d(self.values, indexer, fill_value=fill_value) return Series(new_values, index=new_index, name=self.name) @@ -3014,7 +3015,7 @@ def _try_cast(arr): subarr = data.copy() else: if (com.is_datetime64_dtype(data.dtype) and - not com.is_datetime64_dtype(dtype)): + not com.is_datetime64_dtype(dtype)): if dtype == object: ints = np.asarray(data).view('i8') subarr = tslib.ints_to_pydatetime(ints) @@ -3058,7 +3059,7 @@ def _try_cast(arr): subarr = np.empty(len(index), dtype=dtype) else: # need to possibly convert the value here - value = com._possibly_cast_to_datetime(value, dtype) + value = com._possibly_cast_to_datetime(value, dtype) subarr = np.empty(len(index), dtype=dtype) subarr.fill(value) else: @@ -3145,7 +3146,8 @@ def _repr_footer(self): else: freqstr = '' - namestr = "Name: %s, " % str(self.name) if self.name is not None else "" + namestr = "Name: %s, " % str( + self.name) if self.name is not None else "" return '%s%sLength: %d' % (freqstr, namestr, len(self)) def to_timestamp(self, freq=None, how='start', copy=True): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 29553afc65d28..cac9e84412cfa 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -247,6 +247,7 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0): flags |= re.IGNORECASE regex = re.compile(pat, flags=flags) n = n if n >= 0 else 0 + def f(x): return regex.sub(repl, x, count=n) else: diff --git a/pandas/io/auth.py b/pandas/io/auth.py index 471436cb1b6bf..a44618eba8921 100644 --- a/pandas/io/auth.py +++ b/pandas/io/auth.py @@ -24,12 +24,14 @@ import oauth2client.tools as tools OOB_CALLBACK_URN = oauth.OOB_CALLBACK_URN + class AuthenticationConfigError(ValueError): pass FLOWS = {} FLAGS = gflags.FLAGS -DEFAULT_SECRETS = os.path.join(os.path.dirname(__file__), 'client_secrets.json') +DEFAULT_SECRETS = os.path.join( + os.path.dirname(__file__), 'client_secrets.json') DEFAULT_SCOPE = 'https://www.googleapis.com/auth/analytics.readonly' DEFAULT_TOKEN_FILE = os.path.join(os.path.dirname(__file__), 'analytics.dat') MISSING_CLIENT_MSG = """ @@ -53,6 +55,7 @@ class AuthenticationConfigError(ValueError): # the API without having to login each time. Make sure this file is in # a secure place. + def process_flags(flags=[]): """Uses the command-line flags to set the logging level. @@ -62,14 +65,15 @@ def process_flags(flags=[]): # Let the gflags module process the command-line arguments. try: - FLAGS(flags) + FLAGS(flags) except gflags.FlagsError, e: - print '%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS) - sys.exit(1) + print '%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS) + sys.exit(1) # Set the logging according to the command-line flag. logging.getLogger().setLevel(getattr(logging, FLAGS.logging_level)) + def get_flow(secret, scope, redirect): """ Retrieve an authentication flow object based on the given @@ -88,12 +92,14 @@ def get_flow(secret, scope, redirect): FLOWS[key] = flow return flow + def make_token_store(fpath=None): """create token storage from give file name""" if fpath is None: fpath = DEFAULT_TOKEN_FILE return auth_file.Storage(fpath) + def authenticate(flow, storage=None): """ Try to retrieve a valid set of credentials from the token store if possible @@ -115,6 +121,7 @@ def authenticate(flow, storage=None): http = credentials.authorize(http) return http + def init_service(http): """ Use the given http object to build the analytics service object diff --git a/pandas/io/data.py b/pandas/io/data.py index 7fca9a6d7867c..e4457d141e92c 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -18,7 +18,7 @@ def DataReader(name, data_source=None, start=None, end=None, - retry_count=3, pause=0): + retry_count=3, pause=0): """ Imports data from a number of online sources. @@ -55,7 +55,7 @@ def DataReader(name, data_source=None, start=None, end=None, if(data_source == "yahoo"): return get_data_yahoo(name=name, start=start, end=end, - retry_count=retry_count, pause=pause) + retry_count=retry_count, pause=pause) elif(data_source == "fred"): return get_data_fred(name=name, start=start, end=end) elif(data_source == "famafrench"): @@ -80,16 +80,17 @@ def get_quote_yahoo(symbols): Returns a DataFrame """ if not isinstance(symbols, list): - raise TypeError, "symbols must be a list" + raise TypeError("symbols must be a list") # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r', 'time': 't1', 'short_ratio': 's7'} - request = str.join('',codes.values()) # code request string + request = str.join('', codes.values()) # code request string header = codes.keys() data = dict(zip(codes.keys(), [[] for i in range(len(codes))])) - urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (str.join('+', symbols), request) + urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % ( + str.join('+', symbols), request) try: lines = urllib2.urlopen(urlStr).readlines() @@ -132,14 +133,14 @@ def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0): yahoo_URL = 'http://ichart.yahoo.com/table.csv?' url = yahoo_URL + 's=%s' % name + \ - '&a=%s' % (start.month - 1) + \ - '&b=%s' % start.day + \ - '&c=%s' % start.year + \ - '&d=%s' % (end.month - 1) + \ - '&e=%s' % end.day + \ - '&f=%s' % end.year + \ - '&g=d' + \ - '&ignore=.csv' + '&a=%s' % (start.month - 1) + \ + '&b=%s' % start.day + \ + '&c=%s' % start.year + \ + '&d=%s' % (end.month - 1) + \ + '&e=%s' % end.day + \ + '&f=%s' % end.year + \ + '&g=d' + \ + '&ignore=.csv' for _ in range(retry_count): resp = urllib2.urlopen(url) @@ -178,7 +179,7 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1), fred_URL = "http://research.stlouisfed.org/fred2/series/" url = fred_URL + '%s' % name + \ - '/downloaddata/%s' % name + '.csv' + '/downloaddata/%s' % name + '.csv' data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True, header=None, skiprows=1, names=["DATE", name]) return data.truncate(start, end) @@ -198,15 +199,19 @@ def get_data_famafrench(name, start=None, end=None): datasets = {} for i in range(len(file_edges) - 1): - dataset = [d.split() for d in data[(file_edges[i] + 1):file_edges[i + 1]]] + dataset = [d.split() for d in data[(file_edges[i] + 1): + file_edges[i + 1]]] if(len(dataset) > 10): ncol = np.median(np.array([len(d) for d in dataset])) - header_index = np.where(np.array([len(d) for d in dataset]) == (ncol - 1))[0][-1] + header_index = np.where( + np.array([len(d) for d in dataset]) == (ncol - 1))[0][-1] header = dataset[header_index] # to ensure the header is unique header = [str(j + 1) + " " + header[j] for j in range(len(header))] - index = np.array([d[0] for d in dataset[(header_index + 1):]], dtype=int) - dataset = np.array([d[1:] for d in dataset[(header_index + 1):]], dtype=float) + index = np.array( + [d[0] for d in dataset[(header_index + 1):]], dtype=int) + dataset = np.array( + [d[1:] for d in dataset[(header_index + 1):]], dtype=float) datasets[i] = DataFrame(dataset, index, columns=header) return datasets diff --git a/pandas/io/ga.py b/pandas/io/ga.py index a433a4add7478..bcaf6bd6ec758 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -86,6 +86,7 @@ Local host redirect if unspecified """ + @Substitution(extras=_AUTH_PARAMS) @Appender(_GA_READER_DOC) def read_ga(metrics, dimensions, start_date, **kwargs): @@ -95,6 +96,7 @@ def read_ga(metrics, dimensions, start_date, **kwargs): return reader.get_data(metrics=metrics, start_date=start_date, dimensions=dimensions, **kwargs) + class OAuthDataReader(object): """ Abstract class for handling OAuth2 authentication using the Google @@ -280,7 +282,6 @@ def _read(start, result_size): raise ValueError('Google API error %s: %s' % (inst.resp.status, inst._get_reason())) - if chunksize is None: return _read(start_index, max_results) @@ -333,7 +334,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, max_results=10000, **kwargs): if isinstance(metrics, basestring): metrics = [metrics] - met =','.join(['ga:%s' % x for x in metrics]) + met = ','.join(['ga:%s' % x for x in metrics]) start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d') if end_date is None: @@ -358,6 +359,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, return qry + def _maybe_add_arg(query, field, data): if data is not None: if isinstance(data, basestring): @@ -365,6 +367,7 @@ def _maybe_add_arg(query, field, data): data = ','.join(['ga:%s' % x for x in data]) query[field] = data + def _get_match(obj_store, name, id, **kwargs): key, val = None, None if len(kwargs) > 0: @@ -385,6 +388,7 @@ def _get_match(obj_store, name, id, **kwargs): if name_ok(item) or id_ok(item) or key_ok(item): return item + def _clean_index(index_dims, parse_dates): _should_add = lambda lst: pd.Index(lst).isin(index_dims).all() to_remove = [] @@ -413,17 +417,21 @@ def _clean_index(index_dims, parse_dates): def _get_col_names(header_info): return [x['name'][3:] for x in header_info] + def _get_column_types(header_info): return [(x['name'][3:], x['columnType']) for x in header_info] + def _get_dim_names(header_info): return [x['name'][3:] for x in header_info if x['columnType'] == u'DIMENSION'] + def _get_met_names(header_info): return [x['name'][3:] for x in header_info if x['columnType'] == u'METRIC'] + def _get_data_types(header_info): return [(x['name'][3:], TYPE_MAP.get(x['dataType'], object)) for x in header_info] diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index c24289fe2d063..c6a904b931c98 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -168,6 +168,7 @@ def _is_url(url): else: return False + def _read(filepath_or_buffer, kwds): "Generic reader of line files." encoding = kwds.get('encoding', None) @@ -412,7 +413,7 @@ def read_fwf(filepath_or_buffer, colspecs=None, widths=None, **kwds): if widths is not None: colspecs, col = [], 0 for w in widths: - colspecs.append((col, col+w)) + colspecs.append((col, col + w)) col += w kwds['colspecs'] = colspecs @@ -420,7 +421,6 @@ def read_fwf(filepath_or_buffer, colspecs=None, widths=None, **kwds): return _read(filepath_or_buffer, kwds) - def read_clipboard(**kwargs): # pragma: no cover """ Read text from clipboard and pass to read_table. See read_table for the @@ -647,6 +647,7 @@ def _create_index(self, col_dict, columns): def _is_index_col(col): return col is not None and col is not False + class ParserBase(object): def __init__(self, kwds): @@ -786,7 +787,6 @@ def _agg_index(self, index, try_parse_dates=True): return index - def _convert_to_ndarrays(self, dct, na_values, verbose=False, converters=None): result = {} @@ -904,7 +904,7 @@ def __init__(self, src, **kwds): if not self._has_complex_date_col: if (self._reader.leading_cols == 0 and - _is_index_col(self.index_col)): + _is_index_col(self.index_col)): self._name_processed = True (self.index_names, self.names, @@ -1022,7 +1022,6 @@ def _maybe_parse_dates(self, values, index, try_parse_dates=True): return values - def TextParser(*args, **kwds): """ Converts lists of lists/tuples into DataFrames with proper type inference @@ -1085,6 +1084,7 @@ def TextParser(*args, **kwds): def count_empty_vals(vals): return sum([1 for v in vals if v == '' or v is None]) + def _wrap_compressed(f, compression): compression = compression.lower() if compression == 'gzip': @@ -1096,6 +1096,7 @@ def _wrap_compressed(f, compression): raise ValueError('do not recognize compression method %s' % compression) + class PythonParser(ParserBase): def __init__(self, f, **kwds): @@ -1136,7 +1137,6 @@ def __init__(self, f, **kwds): self.comment = kwds['comment'] self._comment_lines = [] - if isinstance(f, basestring): f = com._get_handle(f, 'r', encoding=self.encoding, compression=self.compression) @@ -1243,13 +1243,13 @@ def read(self, rows=None): self._first_chunk = False columns = list(self.orig_names) - if len(content) == 0: # pragma: no cover + if len(content) == 0: # pragma: no cover # DataFrame with the right metadata, even though it's length 0 return _get_empty_meta(self.orig_names, self.index_col, self.index_names) - #handle new style for names in index + # handle new style for names in index count_empty_content_vals = count_empty_vals(content[0]) indexnamerow = None if self.has_index_names and count_empty_content_vals == len(columns): @@ -1366,7 +1366,7 @@ def _check_comments(self, lines): rl = [] for x in l: if (not isinstance(x, basestring) or - self.comment not in x): + self.comment not in x): rl.append(x) else: x = x[:x.find(self.comment)] @@ -1386,7 +1386,7 @@ def _check_thousands(self, lines): for x in l: if (not isinstance(x, basestring) or self.thousands not in x or - nonnum.search(x.strip())): + nonnum.search(x.strip())): rl.append(x) else: rl.append(x.replace(',', '')) @@ -1707,7 +1707,6 @@ def _get_na_values(col, na_values): return na_values - def _get_col_names(colspec, columns): colset = set(columns) colnames = [] @@ -1859,7 +1858,7 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, # has_index_names: boolean, default False # True if the cols defined in index_col have an index name and are # not in the header - has_index_names=False # removed as new argument of API function + has_index_names = False # removed as new argument of API function skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: @@ -1892,13 +1891,13 @@ def _range2cols(areas): """ def _excel2num(x): "Convert Excel column name like 'AB' to 0-based column index" - return reduce(lambda s,a: s*26+ord(a)-ord('A')+1, x.upper().strip(), 0)-1 + return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, x.upper().strip(), 0) - 1 cols = [] for rng in areas.split(','): if ':' in rng: rng = rng.split(':') - cols += range(_excel2num(rng[0]), _excel2num(rng[1])+1) + cols += range(_excel2num(rng[0]), _excel2num(rng[1]) + 1) else: cols.append(_excel2num(rng)) return cols @@ -1968,7 +1967,7 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, if typ == XL_CELL_DATE: dt = xldate_as_tuple(value, datemode) # how to produce this first case? - if dt[0] < datetime.MINYEAR: # pragma: no cover + if dt[0] < datetime.MINYEAR: # pragma: no cover value = datetime.time(*dt[3:]) else: value = datetime.datetime(*dt) @@ -2022,6 +2021,7 @@ def to_xls(style_dict, num_format_str=None): style_dict: style dictionary to convert """ import xlwt + def style_to_xlwt(item, firstlevel=True, field_sep=',', line_sep=';'): """helper wich recursively generate an xlwt easy style string for example: @@ -2079,7 +2079,7 @@ def to_xlsx(style_dict): for nk, nv in value.items(): if key == "borders": (xls_style.borders.__getattribute__(nk) - .__setattr__('border_style', nv)) + .__setattr__('border_style', nv)) else: xls_style.__getattribute__(key).__setattr__(nk, nv) @@ -2087,7 +2087,7 @@ def to_xlsx(style_dict): def _conv_value(val): - #convert value for excel dump + # convert value for excel dump if isinstance(val, np.int64): val = int(val) elif isinstance(val, np.bool8): @@ -2115,12 +2115,12 @@ def __init__(self, path): import xlwt self.book = xlwt.Workbook() self.fm_datetime = xlwt.easyxf( - num_format_str='YYYY-MM-DD HH:MM:SS') + num_format_str='YYYY-MM-DD HH:MM:SS') self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') else: from openpyxl.workbook import Workbook - self.book = Workbook()#optimized_write=True) - #open pyxl 1.6.1 adds a dummy sheet remove it + self.book = Workbook() # optimized_write=True) + # open pyxl 1.6.1 adds a dummy sheet remove it if self.book.worksheets: self.book.remove_sheet(self.book.worksheets[0]) self.path = path @@ -2175,15 +2175,15 @@ def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): style = CellStyleConverter.to_xlsx(cell.style) for field in style.__fields__: xcell.style.__setattr__(field, - style.__getattribute__(field)) + style.__getattribute__(field)) if isinstance(cell.val, datetime.datetime): xcell.style.number_format.format_code = "YYYY-MM-DD HH:MM:SS" elif isinstance(cell.val, datetime.date): xcell.style.number_format.format_code = "YYYY-MM-DD" - #merging requires openpyxl latest (works on 1.6.1) - #todo add version check + # merging requires openpyxl latest (works on 1.6.1) + # todo add version check if cell.mergestart is not None and cell.mergeend is not None: cletterstart = get_column_letter(startcol + cell.col + 1) cletterend = get_column_letter(startcol + cell.mergeend + 1) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a4cf413269c49..1469620ea01f2 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -37,7 +37,9 @@ # versioning attribute _version = '0.10.1' -class IncompatibilityWarning(Warning): pass + +class IncompatibilityWarning(Warning): + pass # reading and writing the full object in one go _TYPE_MAP = { @@ -47,7 +49,7 @@ class IncompatibilityWarning(Warning): pass DataFrame: 'frame', SparseDataFrame: 'sparse_frame', Panel: 'wide', - Panel4D : 'ndim', + Panel4D: 'ndim', SparsePanel: 'sparse_panel' } @@ -80,15 +82,16 @@ class IncompatibilityWarning(Warning): pass # axes map _AXES_MAP = { - DataFrame : [0], - Panel : [1,2], - Panel4D : [1,2,3], + DataFrame: [0], + Panel: [1, 2], + Panel4D: [1, 2, 3], } # oh the troubles to reduce import time _table_mod = None _table_supports_index = False + def _tables(): global _table_mod global _table_supports_index @@ -106,6 +109,7 @@ def _tables(): return _table_mod + @contextmanager def get_store(path, mode='a', complevel=None, complib=None, fletcher32=False): @@ -212,7 +216,7 @@ def __contains__(self, key): node = self.get_node(key) if node is not None: name = node._v_pathname - return re.search(key,name) is not None + return re.search(key, name) is not None return False def __len__(self): @@ -223,10 +227,10 @@ def __repr__(self): groups = self.groups() if len(groups) > 0: - keys = [] + keys = [] values = [] - for n in sorted(groups, key = lambda x: x._v_name): - kind = getattr(n._v_attrs,'pandas_type',None) + for n in sorted(groups, key=lambda x: x._v_name): + kind = getattr(n._v_attrs, 'pandas_type', None) keys.append(str(n._v_pathname)) @@ -253,7 +257,7 @@ def keys(self): Return a (potentially unordered) list of the keys corresponding to the objects stored in the HDFStore. These are ABSOLUTE path-names (e.g. have the leading '/' """ - return [ n._v_pathname for n in self.groups() ] + return [n._v_pathname for n in self.groups()] def open(self, mode='a', warn=True): """ @@ -355,7 +359,7 @@ def select_as_coordinates(self, key, where=None, **kwargs): ------------------- where : list of Term (or convertable) objects, optional """ - return self.get_table(key).read_coordinates(where = where, **kwargs) + return self.get_table(key).read_coordinates(where=where, **kwargs) def unique(self, key, column, **kwargs): """ @@ -372,7 +376,7 @@ def unique(self, key, column, **kwargs): raises ValueError if the column can not be extracted indivually (it is part of a data block) """ - return self.get_table(key).read_column(column = column, **kwargs) + return self.get_table(key).read_column(column=column, **kwargs) def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kwargs): """ Retrieve pandas objects from multiple tables @@ -389,12 +393,12 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw """ # default to single select - if isinstance(keys, (list,tuple)) and len(keys) == 1: + if isinstance(keys, (list, tuple)) and len(keys) == 1: keys = keys[0] - if isinstance(keys,basestring): - return self.select(key = keys, where=where, columns = columns, **kwargs) + if isinstance(keys, basestring): + return self.select(key=keys, where=where, columns=columns, **kwargs) - if not isinstance(keys, (list,tuple)): + if not isinstance(keys, (list, tuple)): raise Exception("keys must be a list/tuple") if len(keys) == 0: @@ -404,7 +408,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw selector = keys[0] # collect the tables - tbls = [ self.get_table(k) for k in keys ] + tbls = [self.get_table(k) for k in keys] # validate rows nrows = tbls[0].nrows @@ -416,13 +420,13 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw c = self.select_as_coordinates(selector, where) # collect the returns objs - objs = [ t.read(where = c, columns = columns) for t in tbls ] + objs = [t.read(where=c, columns=columns) for t in tbls] # axis is the concentation axes - axis = list(set([ t.non_index_axes[0][0] for t in tbls ]))[0] + axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0] # concat and return - return concat(objs, axis = axis, verify_integrity = True) + return concat(objs, axis=axis, verify_integrity=True) def put(self, key, value, table=False, append=False, **kwargs): """ @@ -475,11 +479,11 @@ def remove(self, key, where=None, start=None, stop=None): if not _is_table_type(group): raise Exception('can only remove with where on objects written as tables') t = create_table(self, group) - return t.delete(where = where, start=start, stop=stop) + return t.delete(where=where, start=start, stop=stop) return None - def append(self, key, value, columns = None, **kwargs): + def append(self, key, value, columns=None, **kwargs): """ Append to Table in file. Node must already exist and be Table format. @@ -507,16 +511,18 @@ def append(self, key, value, columns = None, **kwargs): self._write_to_group(key, value, table=True, append=True, **kwargs) - def append_to_multiple(self, d, value, selector, data_columns = None, axes = None, **kwargs): + def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, **kwargs): """ Append to multiple tables Parameters ---------- - d : a dict of table_name to table_columns, None is acceptable as the values of one node (this will get all the remaining columns) + d : a dict of table_name to table_columns, None is acceptable as the values of + one node (this will get all the remaining columns) value : a pandas object - selector : a string that designates the indexable table; all of its columns will be designed as data_columns, unless data_columns is passed, - in which case these are used + selector : a string that designates the indexable table; all of its columns will + be designed as data_columns, unless data_columns is passed, in which + case these are used Notes ----- @@ -533,7 +539,7 @@ def append_to_multiple(self, d, value, selector, data_columns = None, axes = Non raise Exception("append_to_multiple requires a selector that is in passed dict") # figure out the splitting axis (the non_index_axis) - axis = list(set(range(value.ndim))-set(_AXES_MAP[type(value)]))[0] + axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0] # figure out how to split the value remain_key = None @@ -547,7 +553,7 @@ def append_to_multiple(self, d, value, selector, data_columns = None, axes = Non remain_values.extend(v) if remain_key is not None: ordered = value.axes[axis] - ordd = ordered-Index(remain_values) + ordd = ordered - Index(remain_values) ordd = sorted(ordered.get_indexer(ordd)) d[remain_key] = ordered.take(ordd) @@ -560,9 +566,9 @@ def append_to_multiple(self, d, value, selector, data_columns = None, axes = Non dc = data_columns if k == selector else None # compute the val - val = value.reindex_axis(v, axis = axis, copy = False) + val = value.reindex_axis(v, axis=axis, copy=False) - self.append(k, val, data_columns = dc, **kwargs) + self.append(k, val, data_columns=dc, **kwargs) def create_table_index(self, key, **kwargs): """ Create a pytables index on the table @@ -582,7 +588,8 @@ def create_table_index(self, key, **kwargs): raise Exception("PyTables >= 2.3 is required for table indexing") group = self.get_node(key) - if group is None: return + if group is None: + return if not _is_table_type(group): raise Exception("cannot create table index on a non-table") @@ -590,14 +597,14 @@ def create_table_index(self, key, **kwargs): def groups(self): """ return a list of all the groups (that are not themselves a pandas storage object) """ - return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) ] + return [g for g in self.handle.walkGroups() if getattr(g._v_attrs, 'pandas_type', None)] def get_node(self, key): """ return the node with the key or None if it does not exist """ try: if not key.startswith('/'): key = '/' + key - return self.handle.getNode(self.root,key) + return self.handle.getNode(self.root, key) except: return None @@ -635,7 +642,7 @@ def _write_to_group(self, key, value, table=False, append=False, group = self.get_node(new_path) if group is None: group = self.handle.createGroup(path, p) - path = new_path + path = new_path kind = _TYPE_MAP[type(value)] if table or (append and _is_table_type(group)): @@ -784,10 +791,10 @@ def _read_wide(self, group, where=None, **kwargs): def _write_ndim_table(self, group, obj, append=False, axes=None, index=True, **kwargs): if axes is None: axes = _AXES_MAP[type(obj)] - t = create_table(self, group, typ = 'appendable_ndim') + t = create_table(self, group, typ='appendable_ndim') t.write(axes=axes, obj=obj, append=append, **kwargs) if index: - t.create_index(columns = index) + t.create_index(columns=index) def _read_ndim_table(self, group, where=None, **kwargs): t = create_table(self, group, **kwargs) @@ -797,20 +804,20 @@ def _write_frame_table(self, group, df, append=False, axes=None, index=True, **k if axes is None: axes = _AXES_MAP[type(df)] - t = create_table(self, group, typ = 'appendable_frame' if df.index.nlevels == 1 else 'appendable_multiframe') + t = create_table(self, group, typ='appendable_frame' if df.index.nlevels == 1 else 'appendable_multiframe') t.write(axes=axes, obj=df, append=append, **kwargs) if index: - t.create_index(columns = index) + t.create_index(columns=index) _read_frame_table = _read_ndim_table def _write_wide_table(self, group, panel, append=False, axes=None, index=True, **kwargs): if axes is None: axes = _AXES_MAP[type(panel)] - t = create_table(self, group, typ = 'appendable_panel') + t = create_table(self, group, typ='appendable_panel') t.write(axes=axes, obj=panel, append=append, **kwargs) if index: - t.create_index(columns = index) + t.create_index(columns=index) _read_wide_table = _read_ndim_table @@ -1028,6 +1035,7 @@ def _read_index_legacy(self, group, key): kind = node._v_attrs.kind return _unconvert_index_legacy(data, kind) + class IndexCol(object): """ an index column description class @@ -1041,30 +1049,31 @@ class IndexCol(object): pos : the position in the pytables """ - is_an_indexable = True + is_an_indexable = True is_data_indexable = True - is_searchable = False + is_searchable = False - def __init__(self, values = None, kind = None, typ = None, cname = None, itemsize = None, name = None, axis = None, kind_attr = None, pos = None, **kwargs): + def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None, + name=None, axis=None, kind_attr=None, pos=None, **kwargs): self.values = values - self.kind = kind - self.typ = typ + self.kind = kind + self.typ = typ self.itemsize = itemsize - self.name = name - self.cname = cname + self.name = name + self.cname = cname self.kind_attr = kind_attr - self.axis = axis - self.pos = pos - self.table = None + self.axis = axis + self.pos = pos + self.table = None if name is not None: self.set_name(name, kind_attr) if pos is not None: self.set_pos(pos) - def set_name(self, name, kind_attr = None): + def set_name(self, name, kind_attr=None): """ set the name of this indexer """ - self.name = name + self.name = name self.kind_attr = kind_attr or "%s_kind" % name if self.cname is None: self.cname = name @@ -1073,13 +1082,13 @@ def set_name(self, name, kind_attr = None): def set_axis(self, axis): """ set the axis over which I index """ - self.axis = axis + self.axis = axis return self def set_pos(self, pos): """ set the position of this column in the Table """ - self.pos = pos + self.pos = pos if pos is not None and self.typ is not None: self.typ._v_pos = pos return self @@ -1089,13 +1098,13 @@ def set_table(self, table): return self def __repr__(self): - return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % (self.name,self.cname,self.axis,self.pos,self.kind) + return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % (self.name, self.cname, self.axis, self.pos, self.kind) __str__ = __repr__ def __eq__(self, other): """ compare 2 col items """ - return all([ getattr(self,a,None) == getattr(other,a,None) for a in ['name','cname','axis','pos'] ]) + return all([getattr(self, a, None) == getattr(other, a, None) for a in ['name', 'cname', 'axis', 'pos']]) def __ne__(self, other): return not self.__eq__(other) @@ -1136,7 +1145,7 @@ def description(self): @property def col(self): """ return my current col description """ - return getattr(self.description,self.cname,None) + return getattr(self.description, self.cname, None) @property def cvalues(self): @@ -1146,7 +1155,7 @@ def cvalues(self): def __iter__(self): return iter(self.values) - def maybe_set_size(self, min_itemsize = None, **kwargs): + def maybe_set_size(self, min_itemsize=None, **kwargs): """ maybe set a string col itemsize: min_itemsize can be an interger or a dict with this columns name with an integer size """ if self.kind == 'string': @@ -1155,7 +1164,8 @@ def maybe_set_size(self, min_itemsize = None, **kwargs): min_itemsize = min_itemsize.get(self.name) if min_itemsize is not None and self.typ.itemsize < min_itemsize: - self.typ = _tables().StringCol(itemsize = min_itemsize, pos = self.pos) + self.typ = _tables( + ).StringCol(itemsize=min_itemsize, pos=self.pos) def validate_and_set(self, table, append, **kwargs): self.set_table(table) @@ -1163,11 +1173,11 @@ def validate_and_set(self, table, append, **kwargs): self.validate_attr(append) self.set_attr() - def validate_col(self, itemsize = None): + def validate_col(self, itemsize=None): """ validate this column: return the compared against itemsize """ # validate this column for string truncation (or reset to the max size) - dtype = getattr(self,'dtype',None) + dtype = getattr(self, 'dtype', None) if self.kind == 'string': c = self.col @@ -1175,27 +1185,28 @@ def validate_col(self, itemsize = None): if itemsize is None: itemsize = self.itemsize if c.itemsize < itemsize: - raise Exception("[%s] column has a min_itemsize of [%s] but itemsize [%s] is required!" % (self.cname,itemsize,c.itemsize)) + raise Exception("[%s] column has a min_itemsize of [%s] but itemsize [%s] is required!" + % (self.cname, itemsize, c.itemsize)) return c.itemsize return None - def validate_attr(self, append): # check for backwards incompatibility if append: - existing_kind = getattr(self.attrs,self.kind_attr,None) + existing_kind = getattr(self.attrs, self.kind_attr, None) if existing_kind is not None and existing_kind != self.kind: raise TypeError("incompatible kind in col [%s - %s]" % (existing_kind, self.kind)) def get_attr(self): """ set the kind for this colummn """ - self.kind = getattr(self.attrs,self.kind_attr,None) + self.kind = getattr(self.attrs, self.kind_attr, None) def set_attr(self): """ set the kind for this colummn """ - setattr(self.attrs,self.kind_attr,self.kind) + setattr(self.attrs, self.kind_attr, self.kind) + class DataCol(IndexCol): """ a data holding column, by definition this is not indexable @@ -1206,44 +1217,46 @@ class DataCol(IndexCol): data : the actual data cname : the column name in the table to hold the data (typeically values) """ - is_an_indexable = False + is_an_indexable = False is_data_indexable = False - is_searchable = False + is_searchable = False @classmethod - def create_for_block(cls, i = None, name = None, cname = None, version = None, **kwargs): + def create_for_block(cls, i=None, name=None, cname=None, version=None, **kwargs): """ return a new datacol with the block i """ if cname is None: cname = name or 'values_block_%d' % i if name is None: - name = cname + name = cname - # prior to 0.10.1, we named values blocks like: values_block_0 an the name values_0 + # prior to 0.10.1, we named values blocks like: values_block_0 an the + # name values_0 try: if version[0] == 0 and version[1] <= 10 and version[2] == 0: - m = re.search("values_block_(\d+)",name) + m = re.search("values_block_(\d+)", name) if m: name = "values_%s" % m.groups()[0] except: pass - return cls(name = name, cname = cname, **kwargs) + return cls(name=name, cname=cname, **kwargs) - def __init__(self, values = None, kind = None, typ = None, cname = None, data = None, block = None, **kwargs): - super(DataCol, self).__init__(values = values, kind = kind, typ = typ, cname = cname, **kwargs) + def __init__(self, values=None, kind=None, typ=None, cname=None, data=None, block=None, **kwargs): + super(DataCol, self).__init__( + values=values, kind=kind, typ=typ, cname=cname, **kwargs) self.dtype = None self.dtype_attr = "%s_dtype" % self.name self.set_data(data) def __repr__(self): - return "name->%s,cname->%s,dtype->%s,shape->%s" % (self.name,self.cname,self.dtype,self.shape) + return "name->%s,cname->%s,dtype->%s,shape->%s" % (self.name, self.cname, self.dtype, self.shape) def __eq__(self, other): """ compare 2 col items """ - return all([ getattr(self,a,None) == getattr(other,a,None) for a in ['name','cname','dtype','pos'] ]) + return all([getattr(self, a, None) == getattr(other, a, None) for a in ['name', 'cname', 'dtype', 'pos']]) - def set_data(self, data, dtype = None): + def set_data(self, data, dtype=None): self.data = data if data is not None: if dtype is not None: @@ -1273,18 +1286,21 @@ def set_kind(self): def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs): """ create and setup my atom from the block b """ - self.values = list(block.items) - dtype = block.dtype.name + self.values = list(block.items) + dtype = block.dtype.name inferred_type = lib.infer_dtype(block.values.flatten()) if inferred_type == 'datetime64': self.set_atom_datetime64(block) elif inferred_type == 'date': - raise NotImplementedError("date is not implemented as a table column") + raise NotImplementedError( + "date is not implemented as a table column") elif inferred_type == 'unicode': - raise NotImplementedError("unicode is not implemented as a table column") + raise NotImplementedError( + "unicode is not implemented as a table column") - ### this is basically a catchall; if say a datetime64 has nans then will end up here ### + # this is basically a catchall; if say a datetime64 has nans then will + # end up here ### elif inferred_type == 'string' or dtype == 'object': self.set_atom_string(block, existing_col, min_itemsize, nan_rep) else: @@ -1293,7 +1309,7 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs): return self def get_atom_string(self, block, itemsize): - return _tables().StringCol(itemsize = itemsize, shape = block.shape[0]) + return _tables().StringCol(itemsize=itemsize, shape=block.shape[0]) def set_atom_string(self, block, existing_col, min_itemsize, nan_rep): # fill nan items with myself @@ -1304,8 +1320,9 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep): # specified min_itemsize? if isinstance(min_itemsize, dict): - min_itemsize = int(min_itemsize.get(self.name) or min_itemsize.get('values') or 0) - itemsize = max(min_itemsize or 0,itemsize) + min_itemsize = int(min_itemsize.get( + self.name) or min_itemsize.get('values') or 0) + itemsize = max(min_itemsize or 0, itemsize) # check for column in the values conflicts if existing_col is not None: @@ -1314,32 +1331,32 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep): itemsize = eci self.itemsize = itemsize - self.kind = 'string' - self.typ = self.get_atom_string(block, itemsize) + self.kind = 'string' + self.typ = self.get_atom_string(block, itemsize) self.set_data(self.convert_string_data(data, itemsize)) def convert_string_data(self, data, itemsize): return data.astype('S%s' % itemsize) def get_atom_data(self, block): - return getattr(_tables(),"%sCol" % self.kind.capitalize())(shape = block.shape[0]) + return getattr(_tables(), "%sCol" % self.kind.capitalize())(shape=block.shape[0]) def set_atom_data(self, block): - self.kind = block.dtype.name - self.typ = self.get_atom_data(block) + self.kind = block.dtype.name + self.typ = self.get_atom_data(block) self.set_data(block.values.astype(self.typ._deftype)) def get_atom_datetime64(self, block): - return _tables().Int64Col(shape = block.shape[0]) + return _tables().Int64Col(shape=block.shape[0]) def set_atom_datetime64(self, block): - self.kind = 'datetime64' - self.typ = self.get_atom_datetime64(block) - self.set_data(block.values.view('i8'),'datetime64') + self.kind = 'datetime64' + self.typ = self.get_atom_datetime64(block) + self.set_data(block.values.view('i8'), 'datetime64') @property def shape(self): - return getattr(self.data,'shape',None) + return getattr(self.data, 'shape', None) @property def cvalues(self): @@ -1351,13 +1368,13 @@ def validate_attr(self, append): if append: existing_fields = getattr(self.attrs, self.kind_attr, None) if (existing_fields is not None and - existing_fields != list(self.values)): + existing_fields != list(self.values)): raise Exception("appended items do not match existing items" " in table!") existing_dtype = getattr(self.attrs, self.dtype_attr, None) if (existing_dtype is not None and - existing_dtype != self.dtype): + existing_dtype != self.dtype): raise Exception("appended items dtype do not match existing items dtype" " in table!") @@ -1376,10 +1393,12 @@ def convert(self, values, nan_rep): if self.dtype == 'datetime64': self.data = np.asarray(self.data, dtype='M8[ns]') elif self.dtype == 'date': - self.data = np.array([date.fromtimestamp(v) for v in self.data], dtype=object) + self.data = np.array( + [date.fromtimestamp(v) for v in self.data], dtype=object) elif self.dtype == 'datetime': - self.data = np.array([datetime.fromtimestamp(v) for v in self.data], - dtype=object) + self.data = np.array( + [datetime.fromtimestamp(v) for v in self.data], + dtype=object) else: try: @@ -1389,20 +1408,22 @@ def convert(self, values, nan_rep): # convert nans if self.kind == 'string': - self.data = lib.array_replace_from_nan_rep(self.data.flatten(), nan_rep).reshape(self.data.shape) + self.data = lib.array_replace_from_nan_rep( + self.data.flatten(), nan_rep).reshape(self.data.shape) return self def get_attr(self): """ get the data for this colummn """ - self.values = getattr(self.attrs,self.kind_attr,None) - self.dtype = getattr(self.attrs,self.dtype_attr,None) + self.values = getattr(self.attrs, self.kind_attr, None) + self.dtype = getattr(self.attrs, self.dtype_attr, None) self.set_kind() def set_attr(self): """ set the data for this colummn """ - setattr(self.attrs,self.kind_attr,self.values) + setattr(self.attrs, self.kind_attr, self.values) if self.dtype is not None: - setattr(self.attrs,self.dtype_attr,self.dtype) + setattr(self.attrs, self.dtype_attr, self.dtype) + class DataIndexableCol(DataCol): """ represent a data column that can be indexed """ @@ -1413,14 +1434,15 @@ def is_searchable(self): return self.kind == 'string' def get_atom_string(self, block, itemsize): - return _tables().StringCol(itemsize = itemsize) + return _tables().StringCol(itemsize=itemsize) def get_atom_data(self, block): - return getattr(_tables(),"%sCol" % self.kind.capitalize())() + return getattr(_tables(), "%sCol" % self.kind.capitalize())() def get_atom_datetime64(self, block): return _tables().Int64Col() + class Table(object): """ represent a table: facilitate read/write of various types of tables @@ -1446,29 +1468,29 @@ class Table(object): """ table_type = None - obj_type = None - ndim = None - levels = 1 + obj_type = None + ndim = None + levels = 1 def __init__(self, parent, group, **kwargs): - self.parent = parent - self.group = group + self.parent = parent + self.group = group # compute our version - version = getattr(group._v_attrs,'pandas_version',None) + version = getattr(group._v_attrs, 'pandas_version', None) try: - self.version = tuple([ int(x) for x in version.split('.') ]) + self.version = tuple([int(x) for x in version.split('.')]) if len(self.version) == 2: self.version = self.version + (0,) except: - self.version = (0,0,0) + self.version = (0, 0, 0) - self.index_axes = [] + self.index_axes = [] self.non_index_axes = [] - self.values_axes = [] - self.data_columns = [] - self.nan_rep = None - self.selection = None + self.values_axes = [] + self.data_columns = [] + self.nan_rep = None + self.selection = None @property def table_type_short(self): @@ -1476,17 +1498,18 @@ def table_type_short(self): @property def pandas_type(self): - return getattr(self.group._v_attrs,'pandas_type',None) + return getattr(self.group._v_attrs, 'pandas_type', None) def __repr__(self): """ return a pretty representatgion of myself """ self.infer_axes() - dc = ",dc->[%s]" % ','.join(self.data_columns) if len(self.data_columns) else '' + dc = ",dc->[%s]" % ','.join( + self.data_columns) if len(self.data_columns) else '' return "%s (typ->%s,nrows->%s,indexers->[%s]%s)" % (self.pandas_type, - self.table_type_short, - self.nrows, - ','.join([ a.name for a in self.index_axes ]), - dc) + self.table_type_short, + self.nrows, + ','.join([a.name for a in self.index_axes]), + dc) __str__ = __repr__ @@ -1496,24 +1519,26 @@ def copy(self): def validate(self, other): """ validate against an existing table """ - if other is None: return + if other is None: + return if other.table_type != self.table_type: raise TypeError("incompatible table_type with existing [%s - %s]" % (other.table_type, self.table_type)) - for c in ['index_axes','non_index_axes','values_axes']: - if getattr(self,c,None) != getattr(other,c,None): - raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,getattr(self,c,None),getattr(other,c,None))) + for c in ['index_axes', 'non_index_axes', 'values_axes']: + if getattr(self, c, None) != getattr(other, c, None): + raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" + % (c, getattr(self, c, None), getattr(other, c, None))) @property def nrows(self): - return getattr(self.table,'nrows',None) + return getattr(self.table, 'nrows', None) @property def nrows_expected(self): """ based on our axes, compute the expected nrows """ - return np.prod([ i.cvalues.shape[0] for i in self.index_axes ]) + return np.prod([i.cvalues.shape[0] for i in self.index_axes]) @property def table(self): @@ -1563,66 +1588,69 @@ def is_transposed(self): @property def data_orientation(self): """ return a tuple of my permutated axes, non_indexable at the front """ - return tuple(itertools.chain([ a[0] for a in self.non_index_axes ], [ a.axis for a in self.index_axes ])) + return tuple(itertools.chain([a[0] for a in self.non_index_axes], [a.axis for a in self.index_axes])) def queryables(self): """ return a dict of the kinds allowable columns for this object """ # compute the values_axes queryables - return dict([ (a.cname,a.kind) for a in self.index_axes ] + - [ (self.obj_type._AXIS_NAMES[axis],None) for axis, values in self.non_index_axes ] + - [ (v.cname,v.kind) for v in self.values_axes if v.name in set(self.data_columns) ] + return dict([(a.cname, a.kind) for a in self.index_axes] + + [(self.obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] + + [(v.cname, v.kind) for v in self.values_axes if v.name in set(self.data_columns)] ) def index_cols(self): """ return a list of my index cols """ - return [ (i.axis,i.cname) for i in self.index_axes ] + return [(i.axis, i.cname) for i in self.index_axes] def values_cols(self): """ return a list of my values cols """ - return [ i.cname for i in self.values_axes ] + return [i.cname for i in self.values_axes] def set_attrs(self): """ set our table type & indexables """ - self.attrs.table_type = self.table_type - self.attrs.index_cols = self.index_cols() + self.attrs.table_type = self.table_type + self.attrs.index_cols = self.index_cols() self.attrs.values_cols = self.values_cols() self.attrs.non_index_axes = self.non_index_axes - self.attrs.data_columns = self.data_columns - self.attrs.nan_rep = self.nan_rep - self.attrs.levels = self.levels + self.attrs.data_columns = self.data_columns + self.attrs.nan_rep = self.nan_rep + self.attrs.levels = self.levels - def validate_version(self, where = None): + def validate_version(self, where=None): """ are we trying to operate on an old version? """ if where is not None: if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1: - warnings.warn("where criteria is being ignored as we this version is too old (or not-defined) [%s]" % '.'.join([ str(x) for x in self.version ]), IncompatibilityWarning) + warnings.warn("where criteria is being ignored as we this version is too old (or not-defined) [%s]" + % '.'.join([str(x) for x in self.version]), IncompatibilityWarning) @property def indexables(self): """ create/cache the indexables if they don't exist """ if self._indexables is None: - d = self.description + d = self.description self._indexables = [] # index columns - self._indexables.extend([ IndexCol(name = name, axis = axis, pos = i) for i, (axis, name) in enumerate(self.attrs.index_cols) ]) + self._indexables.extend([IndexCol(name=name, axis=axis, pos=i) for i, (axis, name) in enumerate(self.attrs.index_cols)]) # values columns dc = set(self.data_columns) base_pos = len(self._indexables) + def f(i, c): klass = DataCol if c in dc: klass = DataIndexableCol - return klass.create_for_block(i = i, name = c, pos = base_pos + i, version = self.version) + return klass.create_for_block(i=i, name=c, pos=base_pos + i, version=self.version) - self._indexables.extend([ f(i,c) for i, c in enumerate(self.attrs.values_cols) ]) + self._indexables.extend( + [f(i, c) for i, c in enumerate(self.attrs.values_cols)]) return self._indexables - def create_index(self, columns = None, optlevel = None, kind = None): + def create_index(self, columns=None, optlevel=None, kind=None): """ Create a pytables index on the specified columns note: cannot index Time64Col() currently; PyTables must be >= 2.3 @@ -1640,31 +1668,33 @@ def create_index(self, columns = None, optlevel = None, kind = None): """ - if not self.infer_axes(): return - if columns is False: return + if not self.infer_axes(): + return + if columns is False: + return # index all indexables and data_columns if columns is None or columns is True: - columns = [ a.cname for a in self.axes if a.is_data_indexable ] - if not isinstance(columns, (tuple,list)): - columns = [ columns ] + columns = [a.cname for a in self.axes if a.is_data_indexable] + if not isinstance(columns, (tuple, list)): + columns = [columns] kw = dict() if optlevel is not None: kw['optlevel'] = optlevel if kind is not None: - kw['kind'] = kind + kw['kind'] = kind table = self.table for c in columns: - v = getattr(table.cols,c,None) + v = getattr(table.cols, c, None) if v is not None: # remove the index if the kind/optlevel have changed if v.is_indexed: index = v.index cur_optlevel = index.optlevel - cur_kind = index.kind + cur_kind = index.kind if kind is not None and cur_kind != kind: v.removeIndex() @@ -1687,15 +1717,16 @@ def read_axes(self, where, **kwargs): self.validate_version(where) # infer the data kind - if not self.infer_axes(): return False + if not self.infer_axes(): + return False # create the selection - self.selection = Selection(self, where = where, **kwargs) + self.selection = Selection(self, where=where, **kwargs) values = self.selection.select() # convert the data for a in self.axes: - a.convert(values, nan_rep = self.nan_rep) + a.convert(values, nan_rep=self.nan_rep) return True @@ -1707,19 +1738,24 @@ def infer_axes(self): if table is None: return False - self.non_index_axes = getattr(self.attrs,'non_index_axes',None) or [] - self.data_columns = getattr(self.attrs,'data_columns',None) or [] - self.nan_rep = getattr(self.attrs,'nan_rep',None) - self.levels = getattr(self.attrs,'levels',None) or [] - self.index_axes = [ a.infer(self.table) for a in self.indexables if a.is_an_indexable ] - self.values_axes = [ a.infer(self.table) for a in self.indexables if not a.is_an_indexable ] + self.non_index_axes = getattr( + self.attrs, 'non_index_axes', None) or [] + self.data_columns = getattr( + self.attrs, 'data_columns', None) or [] + self.nan_rep = getattr(self.attrs, 'nan_rep', None) + self.levels = getattr( + self.attrs, 'levels', None) or [] + self.index_axes = [a.infer( + self.table) for a in self.indexables if a.is_an_indexable] + self.values_axes = [a.infer( + self.table) for a in self.indexables if not a.is_an_indexable] return True def get_object(self, obj): """ return the data for this obj """ return obj - def create_axes(self, axes, obj, validate = True, nan_rep = None, data_columns = None, min_itemsize = None, **kwargs): + def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, **kwargs): """ create and return the axes leagcy tables create an indexable column, indexable index, non-indexable fields @@ -1735,24 +1771,24 @@ def create_axes(self, axes, obj, validate = True, nan_rep = None, data_columns = """ # map axes to numbers - axes = [ obj._get_axis_number(a) for a in axes ] + axes = [obj._get_axis_number(a) for a in axes] # do we have an existing table (if so, use its axes & data_columns) if self.infer_axes(): existing_table = self.copy() - axes = [ a.axis for a in existing_table.index_axes] + axes = [a.axis for a in existing_table.index_axes] data_columns = existing_table.data_columns - nan_rep = existing_table.nan_rep + nan_rep = existing_table.nan_rep else: existing_table = None # currently support on ndim-1 axes - if len(axes) != self.ndim-1: + if len(axes) != self.ndim - 1: raise Exception("currenctly only support ndim-1 indexers in an AppendableTable") # create according to the new data - self.non_index_axes = [] - self.data_columns = [] + self.non_index_axes = [] + self.data_columns = [] # nan_representation if nan_rep is None: @@ -1771,10 +1807,12 @@ def create_axes(self, axes, obj, validate = True, nan_rep = None, data_columns = if i in axes: name = obj._AXIS_NAMES[i] - index_axes_map[i] = _convert_index(a).set_name(name).set_axis(i) + index_axes_map[i] = _convert_index( + a).set_name(name).set_axis(i) else: - # we might be able to change the axes on the appending data if necessary + # we might be able to change the axes on the appending data if + # necessary append_axis = list(a) if existing_table is not None: indexer = len(self.non_index_axes) @@ -1785,33 +1823,36 @@ def create_axes(self, axes, obj, validate = True, nan_rep = None, data_columns = if sorted(append_axis) == sorted(exist_axis): append_axis = exist_axis - self.non_index_axes.append((i,append_axis)) + self.non_index_axes.append((i, append_axis)) # set axis positions (based on the axes) - self.index_axes = [ index_axes_map[a].set_pos(j) for j, a in enumerate(axes) ] + self.index_axes = [index_axes_map[a].set_pos(j) for j, + a in enumerate(axes)] j = len(self.index_axes) # check for column conflicts if validate: for a in self.axes: - a.maybe_set_size(min_itemsize = min_itemsize) + a.maybe_set_size(min_itemsize=min_itemsize) # reindex by our non_index_axes & compute data_columns for a in self.non_index_axes: - obj = obj.reindex_axis(a[1], axis = a[0], copy = False) + obj = obj.reindex_axis(a[1], axis=a[0], copy=False) # get out blocks block_obj = self.get_object(obj) - blocks = None + blocks = None if data_columns is not None and len(self.non_index_axes): - axis = self.non_index_axes[0][0] - axis_labels = self.non_index_axes[0][1] - data_columns = [ c for c in data_columns if c in axis_labels ] + axis = self.non_index_axes[0][0] + axis_labels = self.non_index_axes[0][1] + data_columns = [c for c in data_columns if c in axis_labels] if len(data_columns): - blocks = block_obj.reindex_axis(Index(axis_labels)-Index(data_columns), axis = axis, copy = False)._data.blocks + blocks = block_obj.reindex_axis(Index(axis_labels) - Index( + data_columns), axis=axis, copy=False)._data.blocks for c in data_columns: - blocks.extend(block_obj.reindex_axis([ c ], axis = axis, copy = False)._data.blocks) + blocks.extend(block_obj.reindex_axis( + [c], axis=axis, copy=False)._data.blocks) if blocks is None: blocks = block_obj._data.blocks @@ -1821,23 +1862,25 @@ def create_axes(self, axes, obj, validate = True, nan_rep = None, data_columns = for i, b in enumerate(blocks): # shape of the data column are the indexable axes - klass = DataCol - name = None + klass = DataCol + name = None # we have a data_column if data_columns and len(b.items) == 1 and b.items[0] in data_columns: klass = DataIndexableCol - name = b.items[0] + name = b.items[0] self.data_columns.append(name) try: - existing_col = existing_table.values_axes[i] if existing_table is not None and validate else None - - col = klass.create_for_block(i = i, name = name, version = self.version) - col.set_atom(block = b, - existing_col = existing_col, - min_itemsize = min_itemsize, - nan_rep = nan_rep, + existing_col = existing_table.values_axes[ + i] if existing_table is not None and validate else None + + col = klass.create_for_block( + i=i, name=name, version=self.version) + col.set_atom(block=b, + existing_col=existing_col, + min_itemsize=min_itemsize, + nan_rep=nan_rep, **kwargs) col.set_pos(j) @@ -1845,7 +1888,7 @@ def create_axes(self, axes, obj, validate = True, nan_rep = None, data_columns = except (NotImplementedError): raise except (Exception), detail: - raise Exception("cannot find the correct atom type -> [dtype->%s] %s" % (b.dtype.name,str(detail))) + raise Exception("cannot find the correct atom type -> [dtype->%s] %s" % (b.dtype.name, str(detail))) j += 1 # validate the axes if we have an existing table @@ -1856,40 +1899,41 @@ def process_axes(self, obj, columns=None): """ process axes filters """ # reorder by any non_index_axes & limit to the select columns - for axis,labels in self.non_index_axes: + for axis, labels in self.non_index_axes: if columns is not None: labels = Index(labels) & Index(columns) - obj = obj.reindex_axis(labels,axis=axis,copy=False) + obj = obj.reindex_axis(labels, axis=axis, copy=False) def reindex(obj, axis, filt, ordered): ordd = ordered & filt ordd = sorted(ordered.get_indexer(ordd)) - return obj.reindex_axis(ordered.take(ordd), axis = obj._get_axis_number(axis), copy = False) + return obj.reindex_axis(ordered.take(ordd), axis=obj._get_axis_number(axis), copy=False) # apply the selection filters (but keep in the same order) if self.selection.filter: for axis, filt in self.selection.filter: - obj = reindex(obj, axis, filt, getattr(obj,obj._get_axis_name(axis))) + obj = reindex( + obj, axis, filt, getattr(obj, obj._get_axis_name(axis))) return obj - def create_description(self, complib = None, complevel = None, fletcher32 = False, expectedrows = None): + def create_description(self, complib=None, complevel=None, fletcher32=False, expectedrows=None): """ create the description of the table from the axes & values """ # expected rows estimate if expectedrows is None: - expectedrows = max(self.nrows_expected,10000) - d = dict( name = 'table', expectedrows = expectedrows ) + expectedrows = max(self.nrows_expected, 10000) + d = dict(name='table', expectedrows=expectedrows) # description from the axes & values - d['description'] = dict([ (a.cname,a.typ) for a in self.axes ]) + d['description'] = dict([(a.cname, a.typ) for a in self.axes]) if complib: if complevel is None: complevel = self.complevel or 9 - filters = _tables().Filters(complevel = complevel, - complib = complib, - fletcher32 = fletcher32 or self.fletcher32) + filters = _tables().Filters(complevel=complevel, + complib=complib, + fletcher32=fletcher32 or self.fletcher32) d['filters'] = filters elif self.filters is not None: d['filters'] = self.filters @@ -1897,7 +1941,8 @@ def create_description(self, complib = None, complevel = None, fletcher32 = Fals return d def read(self, **kwargs): - raise NotImplementedError("cannot read on an abstract table: subclasses should implement") + raise NotImplementedError( + "cannot read on an abstract table: subclasses should implement") def read_coordinates(self, where=None, **kwargs): """ select coordinates (row numbers) from a table; return the coordinates object """ @@ -1906,11 +1951,12 @@ def read_coordinates(self, where=None, **kwargs): self.validate_version(where) # infer the data kind - if not self.infer_axes(): return False + if not self.infer_axes(): + return False # create the selection - self.selection = Selection(self, where = where, **kwargs) - return Coordinates(self.selection.select_coords(), group = self.group, where = where) + self.selection = Selection(self, where=where, **kwargs) + return Coordinates(self.selection.select_coords(), group=self.group, where=where) def read_column(self, column, **kwargs): """ return a single column from the table, generally only indexables are interesting """ @@ -1919,7 +1965,8 @@ def read_column(self, column, **kwargs): self.validate_version() # infer the data kind - if not self.infer_axes(): return False + if not self.infer_axes(): + return False # find the axes for a in self.axes: @@ -1929,15 +1976,15 @@ def read_column(self, column, **kwargs): raise ValueError("column [%s] can not be extracted individually; it is not data indexable" % column) # column must be an indexable or a data column - c = getattr(self.table.cols,column) - return Categorical.from_array(a.convert(c[:], nan_rep = self.nan_rep).take_data()).levels + c = getattr(self.table.cols, column) + return Categorical.from_array(a.convert(c[:], nan_rep=self.nan_rep).take_data()).levels raise KeyError("column [%s] not found in the table" % column) def write(self, **kwargs): raise NotImplementedError("cannot write on an abstract table") - def delete(self, where = None, **kwargs): + def delete(self, where=None, **kwargs): """ support fully deleting the node in its entirety (only) - where specification must be None """ if where is None: self.handle.removeNode(self.group, recursive=True) @@ -1945,6 +1992,7 @@ def delete(self, where = None, **kwargs): raise NotImplementedError("cannot delete on an abstract table") + class WORMTable(Table): """ a write-once read-many table: this format DOES NOT ALLOW appending to a table. writing is a one-time operation the data are stored in a format @@ -1963,6 +2011,7 @@ def write(self, **kwargs): (e.g. a CArray) create an indexing table so that we can search""" raise NotImplementedError("WORKTable needs to implement write") + class LegacyTable(Table): """ an appendable table: allow append/query/delete operations to a (possibily) already existing appendable table this table ALLOWS @@ -1970,11 +2019,12 @@ class LegacyTable(Table): that can be easily searched """ - _indexables = [IndexCol(name = 'index', axis = 1, pos = 0), - IndexCol(name = 'column', axis = 2, pos = 1, index_kind = 'columns_kind'), - DataCol( name = 'fields', cname = 'values', kind_attr = 'fields', pos = 2) ] + _indexables = [IndexCol(name='index', axis=1, pos=0), + IndexCol(name='column', axis=2, + pos=1, index_kind='columns_kind'), + DataCol(name='fields', cname='values', kind_attr='fields', pos=2)] table_type = 'legacy' - ndim = 3 + ndim = 3 def write(self, **kwargs): raise Exception("write operations are not allowed on legacy tables!") @@ -1982,21 +2032,22 @@ def write(self, **kwargs): def read(self, where=None, columns=None, **kwargs): """ we have n indexable columns, with an arbitrary number of data axes """ + if not self.read_axes(where=where, **kwargs): + return None - if not self.read_axes(where=where, **kwargs): return None - - factors = [ Categorical.from_array(a.values) for a in self.index_axes ] - levels = [ f.levels for f in factors ] - N = [ len(f.levels) for f in factors ] - labels = [ f.labels for f in factors ] + factors = [Categorical.from_array(a.values) for a in self.index_axes] + levels = [f.levels for f in factors] + N = [len(f.levels) for f in factors] + labels = [f.labels for f in factors] # compute the key - key = factor_indexer(N[1:], labels) + key = factor_indexer(N[1:], labels) objs = [] if len(unique(key)) == len(key): - sorter, _ = algos.groupsort_indexer(com._ensure_int64(key), np.prod(N)) + sorter, _ = algos.groupsort_indexer( + com._ensure_int64(key), np.prod(N)) sorter = com._ensure_platform_int(sorter) # create the objs @@ -2005,9 +2056,10 @@ def read(self, where=None, columns=None, **kwargs): # the data need to be sorted sorted_values = c.take_data().take(sorter, axis=0) - take_labels = [ l.take(sorter) for l in labels ] - items = Index(c.values) - block = block2d_to_blocknd(sorted_values, items, tuple(N), take_labels) + take_labels = [l.take(sorter) for l in labels] + items = Index(c.values) + block = block2d_to_blocknd( + sorted_values, items, tuple(N), take_labels) # create the object mgr = BlockManager([block], [items] + levels) @@ -2015,7 +2067,8 @@ def read(self, where=None, columns=None, **kwargs): # permute if needed if self.is_transposed: - obj = obj.transpose(*tuple(Series(self.data_orientation).argsort())) + obj = obj.transpose( + *tuple(Series(self.data_orientation).argsort())) objs.append(obj) @@ -2025,8 +2078,8 @@ def read(self, where=None, columns=None, **kwargs): 'appended') # reconstruct - long_index = MultiIndex.from_arrays([ i.values for i in self.index_axes ]) - + long_index = MultiIndex.from_arrays( + [i.values for i in self.index_axes]) for c in self.values_axes: lp = DataFrame(c.data, index=long_index, columns=c.values) @@ -2050,24 +2103,28 @@ def read(self, where=None, columns=None, **kwargs): if len(objs) == 1: wp = objs[0] else: - wp = concat(objs, axis = 0, verify_integrity = True) + wp = concat(objs, axis=0, verify_integrity=True) # apply the selection filters & axis orderings wp = self.process_axes(wp, columns=columns) return wp + class LegacyFrameTable(LegacyTable): """ support the legacy frame table """ table_type = 'legacy_frame' - obj_type = Panel + obj_type = Panel + def read(self, *args, **kwargs): return super(LegacyFrameTable, self).read(*args, **kwargs)['value'] + class LegacyPanelTable(LegacyTable): """ support the legacy panel table """ table_type = 'legacy_panel' - obj_type = Panel + obj_type = Panel + class AppendableTable(LegacyTable): """ suppor the new appendable table formats """ @@ -2075,8 +2132,8 @@ class AppendableTable(LegacyTable): table_type = 'appendable' def write(self, axes, obj, append=False, complib=None, - complevel=None, fletcher32=None, min_itemsize = None, chunksize = 50000, - expectedrows = None, **kwargs): + complevel=None, fletcher32=None, min_itemsize=None, chunksize=50000, + expectedrows=None, **kwargs): # create the table if it doesn't exist (or get it if it does) if not append: @@ -2084,15 +2141,16 @@ def write(self, axes, obj, append=False, complib=None, self.handle.removeNode(self.group, 'table') # create the axes - self.create_axes(axes = axes, obj = obj, validate = append, min_itemsize = min_itemsize, **kwargs) + self.create_axes(axes=axes, obj=obj, validate=append, + min_itemsize=min_itemsize, **kwargs) if 'table' not in self.group: # create the table - options = self.create_description(complib = complib, - complevel = complevel, - fletcher32 = fletcher32, - expectedrows = expectedrows) + options = self.create_description(complib=complib, + complevel=complevel, + fletcher32=fletcher32, + expectedrows=expectedrows) # set the table attributes self.set_attrs() @@ -2114,10 +2172,11 @@ def write_data(self, chunksize): """ fast writing of data: requires specific cython routines each axis shape """ # create the masks & values - masks = [] + masks = [] for a in self.values_axes: - # figure the mask: only do if we can successfully process this column, otherwise ignore the mask + # figure the mask: only do if we can successfully process this + # column, otherwise ignore the mask mask = com.isnull(a.data).all(axis=0) masks.append(mask.astype('u1')) @@ -2127,29 +2186,31 @@ def write_data(self, chunksize): m = mask & m # the arguments - indexes = [ a.cvalues for a in self.index_axes ] - search = np.array([ a.is_searchable for a in self.values_axes ]).astype('u1') - values = [ a.take_data() for a in self.values_axes ] + indexes = [a.cvalues for a in self.index_axes] + search = np.array( + [a.is_searchable for a in self.values_axes]).astype('u1') + values = [a.take_data() for a in self.values_axes] # write the chunks - rows = self.nrows_expected + rows = self.nrows_expected chunks = int(rows / chunksize) + 1 for i in xrange(chunks): - start_i = i*chunksize - end_i = min((i+1)*chunksize,rows) + start_i = i * chunksize + end_i = min((i + 1) * chunksize, rows) - self.write_data_chunk(indexes = [ a[start_i:end_i] for a in indexes ], - mask = mask[start_i:end_i], - search = search, - values = [ v[:,start_i:end_i] for v in values ]) + self.write_data_chunk( + indexes=[a[start_i:end_i] for a in indexes], + mask=mask[start_i:end_i], + search=search, + values=[v[:, start_i:end_i] for v in values]) def write_data_chunk(self, indexes, mask, search, values): # get our function try: - func = getattr(lib,"create_hdf_rows_%sd" % self.ndim) + func = getattr(lib, "create_hdf_rows_%sd" % self.ndim) args = list(indexes) - args.extend([ mask, search, values ]) + args.extend([mask, search, values]) rows = func(*args) except (Exception), detail: raise Exception("cannot create row-data -> %s" % str(detail)) @@ -2159,10 +2220,12 @@ def write_data_chunk(self, indexes, mask, search, values): self.table.append(rows) self.table.flush() except (Exception), detail: - import pdb; pdb.set_trace() - raise Exception("tables cannot write this data -> %s" % str(detail)) + import pdb + pdb.set_trace() + raise Exception( + "tables cannot write this data -> %s" % str(detail)) - def delete(self, where = None, **kwargs): + def delete(self, where=None, **kwargs): # delete all rows (and return the nrows) if where is None or not len(where): @@ -2171,7 +2234,8 @@ def delete(self, where = None, **kwargs): return nrows # infer the data kind - if not self.infer_axes(): return None + if not self.infer_axes(): + return None # create the selection table = self.table @@ -2179,14 +2243,14 @@ def delete(self, where = None, **kwargs): values = self.selection.select_coords() # delete the rows in reverse order - l = Series(values).order() + l = Series(values).order() ln = len(l) if ln: # construct groups of consecutive rows - diff = l.diff() - groups = list(diff[diff>1].index) + diff = l.diff() + groups = list(diff[diff > 1].index) # 1 group if not len(groups): @@ -2198,13 +2262,14 @@ def delete(self, where = None, **kwargs): # initial element if groups[0] != 0: - groups.insert(0,0) + groups.insert(0, 0) # we must remove in reverse order! pg = groups.pop() for g in reversed(groups): - rows = l.take(range(g,pg)) - table.removeRows(start = rows[rows.index[0]], stop = rows[rows.index[-1]]+1) + rows = l.take(range(g, pg)) + table.removeRows(start=rows[rows.index[0] + ], stop=rows[rows.index[-1]] + 1) pg = g self.table.flush() @@ -2212,11 +2277,12 @@ def delete(self, where = None, **kwargs): # return the number of rows removed return ln + class AppendableFrameTable(AppendableTable): """ suppor the new appendable table formats """ table_type = 'appendable_frame' - ndim = 2 - obj_type = DataFrame + ndim = 2 + obj_type = DataFrame @property def is_transposed(self): @@ -2230,70 +2296,72 @@ def get_object(self, obj): def read(self, where=None, columns=None, **kwargs): - if not self.read_axes(where=where, **kwargs): return None + if not self.read_axes(where=where, **kwargs): + return None - index = self.index_axes[0].values - frames = [] + index = self.index_axes[0].values + frames = [] for a in self.values_axes: cols = Index(a.values) if self.is_transposed: - values = a.cvalues - index_ = cols - cols_ = Index(index) + values = a.cvalues + index_ = cols + cols_ = Index(index) else: - values = a.cvalues.T - index_ = Index(index) - cols_ = cols - + values = a.cvalues.T + index_ = Index(index) + cols_ = cols # if we have a DataIndexableCol, its shape will only be 1 dim if values.ndim == 1: - values = values.reshape(1,values.shape[0]) + values = values.reshape(1, values.shape[0]) - block = make_block(values, cols_, cols_) - mgr = BlockManager([ block ], [ cols_, index_ ]) + block = make_block(values, cols_, cols_) + mgr = BlockManager([block], [cols_, index_]) frames.append(DataFrame(mgr)) if len(frames) == 1: df = frames[0] else: - df = concat(frames, axis = 1, verify_integrity = True) + df = concat(frames, axis=1, verify_integrity=True) # apply the selection filters & axis orderings df = self.process_axes(df, columns=columns) return df + class AppendableMultiFrameTable(AppendableFrameTable): """ a frame with a multi-index """ table_type = 'appendable_multiframe' - obj_type = DataFrame - ndim = 2 + obj_type = DataFrame + ndim = 2 @property def table_type_short(self): return 'appendable_multi' - def write(self, obj, data_columns = None, **kwargs): + def write(self, obj, data_columns=None, **kwargs): if data_columns is None: data_columns = [] for n in obj.index.names: if n not in data_columns: - data_columns.insert(0,n) + data_columns.insert(0, n) self.levels = obj.index.names - return super(AppendableMultiFrameTable, self).write(obj = obj.reset_index(), data_columns = data_columns, **kwargs) + return super(AppendableMultiFrameTable, self).write(obj=obj.reset_index(), data_columns=data_columns, **kwargs) def read(self, *args, **kwargs): df = super(AppendableMultiFrameTable, self).read(*args, **kwargs) df.set_index(self.levels, inplace=True) return df + class AppendablePanelTable(AppendableTable): """ suppor the new appendable table formats """ table_type = 'appendable_panel' - ndim = 3 - obj_type = Panel + ndim = 3 + obj_type = Panel def get_object(self, obj): """ these are written transposed """ @@ -2305,29 +2373,31 @@ def get_object(self, obj): def is_transposed(self): return self.data_orientation != tuple(range(self.ndim)) + class AppendableNDimTable(AppendablePanelTable): """ suppor the new appendable table formats """ table_type = 'appendable_ndim' - ndim = 4 - obj_type = Panel4D + ndim = 4 + obj_type = Panel4D # table maps _TABLE_MAP = { - 'appendable_frame' : AppendableFrameTable, - 'appendable_multiframe' : AppendableMultiFrameTable, - 'appendable_panel' : AppendablePanelTable, - 'appendable_ndim' : AppendableNDimTable, - 'worm' : WORMTable, - 'legacy_frame' : LegacyFrameTable, - 'legacy_panel' : LegacyPanelTable, - 'default' : AppendablePanelTable, + 'appendable_frame': AppendableFrameTable, + 'appendable_multiframe': AppendableMultiFrameTable, + 'appendable_panel': AppendablePanelTable, + 'appendable_ndim': AppendableNDimTable, + 'worm': WORMTable, + 'legacy_frame': LegacyFrameTable, + 'legacy_panel': LegacyPanelTable, + 'default': AppendablePanelTable, } -def create_table(parent, group, typ = None, **kwargs): + +def create_table(parent, group, typ=None, **kwargs): """ return a suitable Table class to operate """ - pt = getattr(group._v_attrs,'pandas_type',None) - tt = getattr(group._v_attrs,'table_type',None) or typ + pt = getattr(group._v_attrs, 'pandas_type', None) + tt = getattr(group._v_attrs, 'table_type', None) or typ # a new node if pt is None: @@ -2351,7 +2421,8 @@ def create_table(parent, group, typ = None, **kwargs): def _itemsize_string_array(arr): """ return the maximum size of elements in a strnig array """ - return max([ str_len(arr[v].ravel()).max() for v in range(arr.shape[0]) ]) + return max([str_len(arr[v].ravel()).max() for v in range(arr.shape[0])]) + def _convert_index(index): if isinstance(index, DatetimeIndex): @@ -2373,12 +2444,12 @@ def _convert_index(index): return IndexCol(converted, 'datetime64', _tables().Int64Col()) elif inferred_type == 'datetime': converted = np.array([(time.mktime(v.timetuple()) + - v.microsecond / 1E6) for v in values], - dtype=np.float64) + v.microsecond / 1E6) for v in values], + dtype=np.float64) return IndexCol(converted, 'datetime', _tables().Time64Col()) elif inferred_type == 'date': converted = np.array([time.mktime(v.timetuple()) for v in values], - dtype=np.int32) + dtype=np.int32) return IndexCol(converted, 'date', _tables().Time32Col()) elif inferred_type == 'string': # atom = _tables().ObjectAtom() @@ -2386,7 +2457,7 @@ def _convert_index(index): converted = np.array(list(values), dtype=np.str_) itemsize = converted.dtype.itemsize - return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize = itemsize) + return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize) elif inferred_type == 'unicode': atom = _tables().ObjectAtom() return IndexCol(np.asarray(values, dtype='O'), 'object', atom) @@ -2533,19 +2604,19 @@ class Term(object): """ - _ops = ['<=','<','>=','>','!=','==','='] - _search = re.compile("^\s*(?P\w+)\s*(?P%s)\s*(?P.+)\s*$" % '|'.join(_ops)) + _ops = ['<=', '<', '>=', '>', '!=', '==', '='] + _search = re.compile("^\s*(?P\w+)\s*(?P%s)\s*(?P.+)\s*$" % '|'.join(_ops)) - def __init__(self, field, op = None, value = None, queryables = None): + def __init__(self, field, op=None, value=None, queryables=None): self.field = None - self.op = None + self.op = None self.value = None - self.q = queryables or dict() - self.filter = None - self.condition = None + self.q = queryables or dict() + self.filter = None + self.condition = None # unpack lists/tuples in field - while(isinstance(field,(tuple,list))): + while(isinstance(field, (tuple, list))): f = field field = f[0] if len(f) > 1: @@ -2556,23 +2627,23 @@ def __init__(self, field, op = None, value = None, queryables = None): # backwards compatible if isinstance(field, dict): self.field = field.get('field') - self.op = field.get('op') or '=' + self.op = field.get('op') or '=' self.value = field.get('value') # passed a term - elif isinstance(field,Term): + elif isinstance(field, Term): self.field = field.field - self.op = field.op + self.op = field.op self.value = field.value # a string expression (or just the field) - elif isinstance(field,basestring): + elif isinstance(field, basestring): # is a term is passed s = self._search.match(field) if s is not None: self.field = s.group('field') - self.op = s.group('op') + self.op = s.group('op') self.value = s.group('value') else: @@ -2580,14 +2651,15 @@ def __init__(self, field, op = None, value = None, queryables = None): # is an op passed? if isinstance(op, basestring) and op in self._ops: - self.op = op + self.op = op self.value = value else: - self.op = '=' + self.op = '=' self.value = op else: - raise Exception("Term does not understand the supplied field [%s]" % field) + raise Exception( + "Term does not understand the supplied field [%s]" % field) # we have valid fields if self.field is None or self.op is None or self.value is None: @@ -2598,18 +2670,18 @@ def __init__(self, field, op = None, value = None, queryables = None): self.op = '=' # we have valid conditions - if self.op in ['>','>=','<','<=']: - if hasattr(self.value,'__iter__') and len(self.value) > 1: + if self.op in ['>', '>=', '<', '<=']: + if hasattr(self.value, '__iter__') and len(self.value) > 1: raise Exception("an inequality condition cannot have multiple values [%s]" % str(self)) - if not hasattr(self.value,'__iter__'): - self.value = [ self.value ] + if not hasattr(self.value, '__iter__'): + self.value = [self.value] if len(self.q): self.eval() def __str__(self): - return "field->%s,op->%s,value->%s" % (self.field,self.op,self.value) + return "field->%s,op->%s,value->%s" % (self.field, self.op, self.value) __repr__ = __str__ @@ -2636,32 +2708,34 @@ def eval(self): # convert values if we are in the table if self.is_in_table: - values = [ self.convert_value(v) for v in self.value ] + values = [self.convert_value(v) for v in self.value] else: - values = [ [v, v] for v in self.value ] + values = [[v, v] for v in self.value] # equality conditions - if self.op in ['=','!=']: + if self.op in ['=', '!=']: if self.is_in_table: # too many values to create the expression? if len(values) <= 61: - self.condition = "(%s)" % ' | '.join([ "(%s == %s)" % (self.field,v[0]) for v in values]) + self.condition = "(%s)" % ' | '.join( + ["(%s == %s)" % (self.field, v[0]) for v in values]) # use a filter after reading else: - self.filter = (self.field,Index([ v[1] for v in values ])) + self.filter = (self.field, Index([v[1] for v in values])) else: - self.filter = (self.field,Index([ v[1] for v in values ])) + self.filter = (self.field, Index([v[1] for v in values])) else: if self.is_in_table: - self.condition = '(%s %s %s)' % (self.field, self.op, values[0][0]) + self.condition = '(%s %s %s)' % ( + self.field, self.op, values[0][0]) else: @@ -2670,9 +2744,9 @@ def eval(self): def convert_value(self, v): """ convert the expression that is in the term to something that is accepted by pytables """ - if self.kind == 'datetime64' : + if self.kind == 'datetime64': return [lib.Timestamp(v).value, None] - elif isinstance(v, datetime) or hasattr(v,'timetuple') or self.kind == 'date': + elif isinstance(v, datetime) or hasattr(v, 'timetuple') or self.kind == 'date': return [time.mktime(v.timetuple()), None] elif self.kind == 'integer': v = int(float(v)) @@ -2686,6 +2760,7 @@ def convert_value(self, v): # string quoting return ["'" + v + "'", v] + class Coordinates(object): """ holds a returned coordinates list, useful to select the same rows from different tables @@ -2696,8 +2771,9 @@ class Coordinates(object): def __init__(self, values, group, where, **kwargs): self.values = values - self.group = group - self.where = where + self.group = group + self.where = where + class Selection(object): """ @@ -2711,23 +2787,23 @@ class Selection(object): """ def __init__(self, table, where=None, start=None, stop=None, **kwargs): - self.table = table - self.where = where - self.start = start - self.stop = stop - self.condition = None - self.filter = None - self.terms = None + self.table = table + self.where = where + self.start = start + self.stop = stop + self.condition = None + self.filter = None + self.terms = None self.coordinates = None if isinstance(where, Coordinates): self.coordinates = where.values else: - self.terms = self.generate(where) + self.terms = self.generate(where) # create the numexpr & the filter if self.terms: - conds = [ t.condition for t in self.terms if t.condition is not None ] + conds = [t.condition for t in self.terms if t.condition is not None] if len(conds): self.condition = "(%s)" % ' & '.join(conds) self.filter = [] @@ -2737,20 +2813,22 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs): def generate(self, where): """ where can be a : dict,list,tuple,string """ - if where is None: return None + if where is None: + return None - if not isinstance(where, (list,tuple)): - where = [ where ] + if not isinstance(where, (list, tuple)): + where = [where] else: - # make this a list of we think that we only have a sigle term & no operands inside any terms - if not any([ isinstance(w, (list,tuple,Term)) for w in where ]): + # make this a list of we think that we only have a sigle term & no + # operands inside any terms + if not any([isinstance(w, (list, tuple, Term)) for w in where]): - if not any([ isinstance(w,basestring) and Term._search.match(w) for w in where ]): - where = [ where ] + if not any([isinstance(w, basestring) and Term._search.match(w) for w in where]): + where = [where] queryables = self.table.queryables() - return [ Term(c, queryables = queryables) for c in where ] + return [Term(c, queryables=queryables) for c in where] def select(self): """ @@ -2760,7 +2838,7 @@ def select(self): return self.table.table.readWhere(self.condition, start=self.start, stop=self.stop) elif self.coordinates is not None: return self.table.table.readCoordinates(self.coordinates) - return self.table.table.read(start=self.start,stop=self.stop) + return self.table.table.read(start=self.start, stop=self.stop) def select_coords(self): """ @@ -2769,7 +2847,7 @@ def select_coords(self): if self.condition is None: return np.arange(self.table.nrows) - return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort = True) + return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True) def _get_index_factory(klass): diff --git a/pandas/io/tests/__init__.py b/pandas/io/tests/__init__.py index 8b137891791fe..e69de29bb2d1d 100644 --- a/pandas/io/tests/__init__.py +++ b/pandas/io/tests/__init__.py @@ -1 +0,0 @@ - diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index 9b5abf1c435a8..5a7e646eca0eb 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -35,6 +35,7 @@ def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth + class TestCParser(unittest.TestCase): def setUp(self): @@ -222,7 +223,7 @@ def test_numpy_string_dtype(self): def _make_reader(**kwds): return TextReader(StringIO(data), delimiter=',', header=None, - **kwds) + **kwds) reader = _make_reader(dtype='S5,i4') result = reader.read() @@ -254,6 +255,7 @@ def test_pass_dtype(self): 2,b 3,c 4,d""" + def _make_reader(**kwds): return TextReader(StringIO(data), delimiter=',', **kwds) @@ -280,6 +282,7 @@ def test_usecols(self): 4,5,6 7,8,9 10,11,12""" + def _make_reader(**kwds): return TextReader(StringIO(data), delimiter=',', **kwds) @@ -331,6 +334,5 @@ def assert_array_dicts_equal(left, right): assert(np.array_equal(v, right[k])) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) - diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 3c2a4611285ee..9396581f74326 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -23,6 +23,7 @@ from pandas.lib import Timestamp import pandas.io.date_converters as conv + class TestConverters(unittest.TestCase): def setUp(self): @@ -52,16 +53,16 @@ def test_parse_date_time(self): self.assert_('date_time' in df) self.assert_(df.date_time.ix[0] == datetime(2001, 1, 5, 10, 0, 0)) - data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" - "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" - "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" - "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" - "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" - "KORD,19990127, 23:00:00, 22:56:00, -0.5900") + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") date_spec = {'nominal': [1, 2], 'actual': [1, 3]} df = read_csv(StringIO(data), header=None, parse_dates=date_spec, - date_parser=conv.parse_date_time) + date_parser=conv.parse_date_time) def test_parse_date_fields(self): result = conv.parse_date_fields(self.years, self.months, self.days) @@ -122,5 +123,5 @@ def test_generic(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 7169208531387..1b29a4bdd9bf2 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -35,24 +35,28 @@ from pandas.io.parsers import (ExcelFile, ExcelWriter, read_csv) + def _skip_if_no_xlrd(): try: import xlrd except ImportError: raise nose.SkipTest('xlrd not installed, skipping') + def _skip_if_no_xlwt(): try: import xlwt except ImportError: raise nose.SkipTest('xlwt not installed, skipping') + def _skip_if_no_openpyxl(): try: import openpyxl except ImportError: raise nose.SkipTest('openpyxl not installed, skipping') + def _skip_if_no_excelsuite(): _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -94,7 +98,7 @@ def test_parse_cols_int(self): pth = os.path.join(self.dirpath, 'test.xls%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, - parse_cols=3) + parse_cols=3) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, @@ -112,12 +116,12 @@ def test_parse_cols_list(self): pth = os.path.join(self.dirpath, 'test.xls%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, - parse_cols=[0, 2, 3]) + parse_cols=[0, 2, 3]) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, - parse_dates=True, - parse_cols=[0, 2, 3]) + parse_dates=True, + parse_cols=[0, 2, 3]) tm.assert_frame_equal(df, df2) tm.assert_frame_equal(df3, df2) @@ -133,7 +137,7 @@ def test_parse_cols_str(self): xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, - parse_cols='A:D') + parse_cols='A:D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, @@ -143,23 +147,23 @@ def test_parse_cols_str(self): del df, df2, df3 df = xls.parse('Sheet1', index_col=0, parse_dates=True, - parse_cols='A,C,D') + parse_cols='A,C,D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, - parse_dates=True, - parse_cols='A,C,D') + parse_dates=True, + parse_cols='A,C,D') tm.assert_frame_equal(df, df2) tm.assert_frame_equal(df3, df2) del df, df2, df3 df = xls.parse('Sheet1', index_col=0, parse_dates=True, - parse_cols='A,C:D') + parse_cols='A,C:D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, - parse_dates=True, - parse_cols='A,C:D') + parse_dates=True, + parse_cols='A,C:D') tm.assert_frame_equal(df, df2) tm.assert_frame_equal(df3, df2) @@ -168,7 +172,7 @@ def test_excel_stop_iterator(self): excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls')) parsed = excel_data.parse('Sheet1') - expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1']) + expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) tm.assert_frame_equal(parsed, expected) def test_excel_cell_error_na(self): @@ -235,7 +239,6 @@ def read_csv(self, *args, **kwds): kwds['engine'] = 'python' return read_csv(*args, **kwds) - def test_excel_roundtrip_xls(self): _skip_if_no_excelsuite() self._check_extension('xls') @@ -249,24 +252,24 @@ def _check_extension(self, ext): self.frame['A'][:5] = nan - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) # test roundtrip - self.frame.to_excel(path,'test1') + self.frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) - self.frame.to_excel(path,'test1', index=False) + self.frame.to_excel(path, 'test1', index=False) reader = ExcelFile(path) recons = reader.parse('test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) - self.frame.to_excel(path,'test1',na_rep='NA') + self.frame.to_excel(path, 'test1', na_rep='NA') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) @@ -287,7 +290,7 @@ def test_excel_roundtrip_xlsx_mixed(self): def _check_extension_mixed(self, ext): path = '__tmp_to_excel_from_excel_mixed__.' + ext - self.mixed_frame.to_excel(path,'test1') + self.mixed_frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons) @@ -329,14 +332,14 @@ def _check_extension_int64(self, ext): self.frame['A'][:5] = nan - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) - #Test np.int64, values read come back as float - frame = DataFrame(np.random.randint(-10,10,size=(10,2))) - frame.to_excel(path,'test1') + # Test np.int64, values read come back as float + frame = DataFrame(np.random.randint(-10, 10, size=(10, 2))) + frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np.int64) tm.assert_frame_equal(frame, recons) @@ -351,20 +354,19 @@ def test_excel_roundtrip_xlsx_bool(self): _skip_if_no_excelsuite() self._check_extension_bool('xlsx') - def _check_extension_bool(self, ext): path = '__tmp_to_excel_from_excel_bool__.' + ext self.frame['A'][:5] = nan - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) - #Test reading/writing np.bool8, roundtrip only works for xlsx - frame = (DataFrame(np.random.randn(10,2)) >= 0) - frame.to_excel(path,'test1') + # Test reading/writing np.bool8, roundtrip only works for xlsx + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np.bool8) tm.assert_frame_equal(frame, recons) @@ -379,26 +381,25 @@ def test_excel_roundtrip_xlsx_sheets(self): _skip_if_no_excelsuite() self._check_extension_sheets('xlsx') - def _check_extension_sheets(self, ext): path = '__tmp_to_excel_from_excel_sheets__.' + ext self.frame['A'][:5] = nan - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(path) - self.frame.to_excel(writer,'test1') - self.tsframe.to_excel(writer,'test2') + self.frame.to_excel(writer, 'test1') + self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) + recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) - recons = reader.parse('test2',index_col=0) + recons = reader.parse('test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal('test1', reader.sheet_names[0]) @@ -419,10 +420,10 @@ def _check_extension_colaliases(self, ext): self.frame['A'][:5] = nan - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) @@ -448,27 +449,28 @@ def _check_extension_indexlabels(self, ext): try: self.frame['A'][:5] = nan - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) # test index_label - frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) - frame = (DataFrame(np.random.randn(10,2)) >= 0) - frame.to_excel(path, 'test1', index_label=['test', 'dummy', 'dummy2']) + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel( + path, 'test1', index_label=['test', 'dummy', 'dummy2']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) - frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label='test') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) @@ -477,12 +479,13 @@ def _check_extension_indexlabels(self, ext): finally: os.remove(path) - #test index_labels in same row as column names + # test index_labels in same row as column names path = '%s.xls' % tm.rands(10) try: self.frame.to_excel(path, 'test1', cols=['A', 'B', 'C', 'D'], index=False) - #take 'A' and 'B' as indexes (they are in same row as cols 'C', 'D') + # take 'A' and 'B' as indexes (they are in same row as cols 'C', + # 'D') df = self.frame.copy() df = df.set_index(['A', 'B']) @@ -530,10 +533,10 @@ def test_excel_roundtrip_datetime(self): def test_excel_roundtrip_bool(self): _skip_if_no_openpyxl() - #Test roundtrip np.bool8, does not seem to work for xls + # Test roundtrip np.bool8, does not seem to work for xls path = '__tmp_excel_roundtrip_bool__.xlsx' - frame = (DataFrame(np.random.randn(10,2)) >= 0) - frame.to_excel(path,'test1') + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(frame, recons) @@ -563,11 +566,11 @@ def test_to_excel_multiindex_xlsx(self): self._check_excel_multiindex('xlsx') def _check_excel_multiindex(self, ext): - path = '__tmp_to_excel_multiindex__' + ext + '__.'+ext + path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext frame = self.frame old_index = frame.index - arrays = np.arange(len(old_index)*2).reshape(2,-1) + arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index @@ -577,10 +580,10 @@ def _check_excel_multiindex(self, ext): # round trip frame.to_excel(path, 'test1') reader = ExcelFile(path) - df = reader.parse('test1', index_col=[0,1], parse_dates=False) + df = reader.parse('test1', index_col=[0, 1], parse_dates=False) tm.assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names) - self.frame.index = old_index # needed if setUP becomes a classmethod + self.frame.index = old_index # needed if setUP becomes a classmethod os.remove(path) @@ -602,9 +605,9 @@ def _check_excel_multiindex_dates(self, ext): new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) - tsframe.to_excel(path, 'test1', index_label = ['time','foo']) + tsframe.to_excel(path, 'test1', index_label=['time', 'foo']) reader = ExcelFile(path) - recons = reader.parse('test1', index_col=[0,1]) + recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons) # infer index @@ -613,7 +616,7 @@ def _check_excel_multiindex_dates(self, ext): recons = reader.parse('test1') tm.assert_frame_equal(tsframe, recons) - self.tsframe.index = old_index # needed if setUP becomes classmethod + self.tsframe.index = old_index # needed if setUP becomes classmethod os.remove(path) @@ -670,11 +673,11 @@ def test_to_excel_styleconverter(self): raise nose.SkipTest hstyle = {"font": {"bold": True}, - "borders": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "alignment": {"horizontal": "center"}} + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} xls_style = CellStyleConverter.to_xls(hstyle) self.assertTrue(xls_style.font.bold) self.assertEquals(xlwt.Borders.THIN, xls_style.borders.top) @@ -726,7 +729,6 @@ def test_to_excel_styleconverter(self): # filename = '__tmp_to_excel_header_styling_xls__.xls' # pdf.to_excel(filename, 'test1') - # wbk = xlrd.open_workbook(filename, # formatting_info=True) # self.assertEquals(["test1"], wbk.sheet_names()) @@ -744,12 +746,8 @@ def test_to_excel_styleconverter(self): # self.assertEquals(1, cell_xf.border.bottom_line_style) # self.assertEquals(1, cell_xf.border.left_line_style) # self.assertEquals(2, cell_xf.alignment.hor_align) - # os.remove(filename) - - # def test_to_excel_header_styling_xlsx(self): - # import StringIO # s = StringIO.StringIO( # """Date,ticker,type,value @@ -768,24 +766,19 @@ def test_to_excel_styleconverter(self): # df = read_csv(s, parse_dates=["Date"]) # pdf = df.pivot_table(values="value", rows=["ticker"], # cols=["Date", "type"]) - # try: # import openpyxl # from openpyxl.cell import get_column_letter # except ImportError: # raise nose.SkipTest - # if openpyxl.__version__ < '1.6.1': # raise nose.SkipTest - # # test xlsx_styling # filename = '__tmp_to_excel_header_styling_xlsx__.xlsx' # pdf.to_excel(filename, 'test1') - # wbk = openpyxl.load_workbook(filename) # self.assertEquals(["test1"], wbk.get_sheet_names()) # ws = wbk.get_sheet_by_name('test1') - # xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))] # xlsaddrs += ["A%s" % i for i in range(1, 6)] # xlsaddrs += ["B1", "D1", "F1"] @@ -802,13 +795,10 @@ def test_to_excel_styleconverter(self): # cell.style.borders.left.border_style) # self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, # cell.style.alignment.horizontal) - # mergedcells_addrs = ["C1", "E1", "G1"] # for maddr in mergedcells_addrs: # self.assertTrue(ws.cell(maddr).merged) - # os.remove(filename) - def test_excel_010_hemstring(self): try: import xlwt @@ -819,51 +809,53 @@ def test_excel_010_hemstring(self): from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 # override of #2370 until sorted out in 0.11 - def roundtrip(df,header=True,parser_hdr=0): - path = '__tmp__test_xl_010_%s__.xls' % np.random.randint(1,10000) - df.to_excel(path,header=header) - xf = pd.ExcelFile(path) - try: - res = xf.parse(xf.sheet_names[0],header=parser_hdr) - return res - finally: - os.remove(path) + + def roundtrip(df, header=True, parser_hdr=0): + path = '__tmp__test_xl_010_%s__.xls' % np.random.randint(1, 10000) + df.to_excel(path, header=header) + xf = pd.ExcelFile(path) + try: + res = xf.parse(xf.sheet_names[0], header=parser_hdr) + return res + finally: + os.remove(path) nrows = 5 ncols = 3 - for i in range(1,4): # row multindex upto nlevel=3 - for j in range(1,4): # col "" - df = mkdf(nrows,ncols,r_idx_nlevels=i,c_idx_nlevels=j) + for i in range(1, 4): # row multindex upto nlevel=3 + for j in range(1, 4): # col "" + df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) res = roundtrip(df) # shape - self.assertEqual(res.shape,(nrows,ncols+i)) + self.assertEqual(res.shape, (nrows, ncols + i)) # no nans for r in range(len(res.index)): for c in range(len(res.columns)): - self.assertTrue(res.ix[r,c] is not np.nan) + self.assertTrue(res.ix[r, c] is not np.nan) - for i in range(1,4): # row multindex upto nlevel=3 - for j in range(1,4): # col "" - df = mkdf(nrows,ncols,r_idx_nlevels=i,c_idx_nlevels=j) - res = roundtrip(df,False) + for i in range(1, 4): # row multindex upto nlevel=3 + for j in range(1, 4): # col "" + df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) + res = roundtrip(df, False) # shape - self.assertEqual(res.shape,(nrows-1,ncols+i)) # first row taken as columns + self.assertEqual(res.shape, ( + nrows - 1, ncols + i)) # first row taken as columns # no nans for r in range(len(res.index)): for c in range(len(res.columns)): - self.assertTrue(res.ix[r,c] is not np.nan) + self.assertTrue(res.ix[r, c] is not np.nan) res = roundtrip(DataFrame([0])) - self.assertEqual(res.shape,(1,1)) - self.assertTrue(res.ix[0,0] is not np.nan) + self.assertEqual(res.shape, (1, 1)) + self.assertTrue(res.ix[0, 0] is not np.nan) - res = roundtrip(DataFrame([0]),False,None) - self.assertEqual(res.shape,(1,2)) - self.assertTrue(res.ix[0,0] is not np.nan) + res = roundtrip(DataFrame([0]), False, None) + self.assertEqual(res.shape, (1, 2)) + self.assertTrue(res.ix[0, 0] is not np.nan) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_ga.py b/pandas/io/tests/test_ga.py index 1f7f1de67e7a1..651430b655ab4 100644 --- a/pandas/io/tests/test_ga.py +++ b/pandas/io/tests/test_ga.py @@ -8,6 +8,7 @@ from pandas.util.testing import network, assert_frame_equal from numpy.testing.decorators import slow + class TestGoogle(unittest.TestCase): _multiprocess_can_split_ = True @@ -32,10 +33,10 @@ def test_getdata(self): df = reader.get_data( metrics=['avgTimeOnSite', 'visitors', 'newVisits', 'pageviewsPerVisit'], - start_date = start_date, - end_date = end_date, + start_date=start_date, + end_date=end_date, dimensions=['date', 'hour'], - parse_dates={'ts' : ['date', 'hour']}) + parse_dates={'ts': ['date', 'hour']}) assert isinstance(df, DataFrame) assert isinstance(df.index, pd.DatetimeIndex) @@ -54,7 +55,7 @@ def test_getdata(self): start_date=start_date, end_date=end_date, dimensions=['date', 'hour'], - parse_dates={'ts' : ['date', 'hour']}) + parse_dates={'ts': ['date', 'hour']}) assert_frame_equal(df, df2) @@ -112,5 +113,5 @@ def test_iterator(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 73fd10c21c33d..297b8e291c681 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -71,35 +71,35 @@ def test_empty_string(self): g,7,seven """ df = self.read_csv(StringIO(data)) - xp = DataFrame({'One' : ['a', 'b', np.nan, 'd', 'e', np.nan, 'g'], - 'Two' : [1,2,3,4,5,6,7], - 'Three' : ['one', 'two', 'three', np.nan, 'five', - np.nan, 'seven']}) + xp = DataFrame({'One': ['a', 'b', np.nan, 'd', 'e', np.nan, 'g'], + 'Two': [1, 2, 3, 4, 5, 6, 7], + 'Three': ['one', 'two', 'three', np.nan, 'five', + np.nan, 'seven']}) tm.assert_frame_equal(xp.reindex(columns=df.columns), df) df = self.read_csv(StringIO(data), na_values={'One': [], 'Three': []}, - keep_default_na=False) - xp = DataFrame({'One' : ['a', 'b', '', 'd', 'e', 'nan', 'g'], - 'Two' : [1,2,3,4,5,6,7], - 'Three' : ['one', 'two', 'three', 'nan', 'five', - '', 'seven']}) + keep_default_na=False) + xp = DataFrame({'One': ['a', 'b', '', 'd', 'e', 'nan', 'g'], + 'Two': [1, 2, 3, 4, 5, 6, 7], + 'Three': ['one', 'two', 'three', 'nan', 'five', + '', 'seven']}) tm.assert_frame_equal(xp.reindex(columns=df.columns), df) - df = self.read_csv(StringIO(data), na_values=['a'], keep_default_na=False) - xp = DataFrame({'One' : [np.nan, 'b', '', 'd', 'e', 'nan', 'g'], - 'Two' : [1, 2, 3, 4, 5, 6, 7], - 'Three' : ['one', 'two', 'three', 'nan', 'five', '', - 'seven']}) + df = self.read_csv( + StringIO(data), na_values=['a'], keep_default_na=False) + xp = DataFrame({'One': [np.nan, 'b', '', 'd', 'e', 'nan', 'g'], + 'Two': [1, 2, 3, 4, 5, 6, 7], + 'Three': ['one', 'two', 'three', 'nan', 'five', '', + 'seven']}) tm.assert_frame_equal(xp.reindex(columns=df.columns), df) df = self.read_csv(StringIO(data), na_values={'One': [], 'Three': []}) - xp = DataFrame({'One' : ['a', 'b', np.nan, 'd', 'e', np.nan, 'g'], - 'Two' : [1,2,3,4,5,6,7], - 'Three' : ['one', 'two', 'three', np.nan, 'five', - np.nan, 'seven']}) + xp = DataFrame({'One': ['a', 'b', np.nan, 'd', 'e', np.nan, 'g'], + 'Two': [1, 2, 3, 4, 5, 6, 7], + 'Three': ['one', 'two', 'three', np.nan, 'five', + np.nan, 'seven']}) tm.assert_frame_equal(xp.reindex(columns=df.columns), df) - def test_read_csv(self): if not py3compat.PY3: if 'win' in sys.platform: @@ -150,13 +150,12 @@ def test_squeeze(self): b,2 c,3 """ - expected = Series([1,2,3], ['a', 'b', 'c']) + expected = Series([1, 2, 3], ['a', 'b', 'c']) result = self.read_table(StringIO(data), sep=',', index_col=0, - header=None, squeeze=True) + header=None, squeeze=True) self.assert_(isinstance(result, Series)) assert_series_equal(result, expected) - def test_inf_parsing(self): data = """\ ,A @@ -175,14 +174,15 @@ def test_multiple_date_col(self): KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 """ + def func(*date_cols): return lib.try_parse_dates(parsers._concat_date_cols(date_cols)) df = self.read_csv(StringIO(data), header=None, date_parser=func, prefix='X', - parse_dates={'nominal' : [1, 2], - 'actual' : [1,3]}) + parse_dates={'nominal': [1, 2], + 'actual': [1, 3]}) self.assert_('nominal' in df) self.assert_('actual' in df) self.assert_('X1' not in df) @@ -194,8 +194,8 @@ def func(*date_cols): df = self.read_csv(StringIO(data), header=None, date_parser=func, - parse_dates={'nominal' : [1, 2], - 'actual' : [1,3]}, + parse_dates={'nominal': [1, 2], + 'actual': [1, 3]}, keep_date_col=True) self.assert_('nominal' in df) self.assert_('actual' in df) @@ -214,7 +214,7 @@ def func(*date_cols): """ df = read_csv(StringIO(data), header=None, prefix='X', - parse_dates=[[1, 2], [1,3]]) + parse_dates=[[1, 2], [1, 3]]) self.assert_('X1_X2' in df) self.assert_('X1_X3' in df) @@ -226,7 +226,7 @@ def func(*date_cols): self.assert_(df.ix[0, 'X1_X2'] == d) df = read_csv(StringIO(data), header=None, - parse_dates=[[1, 2], [1,3]], keep_date_col=True) + parse_dates=[[1, 2], [1, 3]], keep_date_col=True) self.assert_('1_2' in df) self.assert_('1_3' in df) @@ -247,12 +247,12 @@ def func(*date_cols): self.assert_(df.index[0] == d) def test_multiple_date_cols_int_cast(self): - data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" - "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" - "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" - "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" - "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" - "KORD,19990127, 23:00:00, 22:56:00, -0.5900") + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") date_spec = {'nominal': [1, 2], 'actual': [1, 3]} import pandas.io.date_converters as conv @@ -301,7 +301,7 @@ def test_multiple_date_cols_with_header(self): def test_multiple_date_col_name_collision(self): self.assertRaises(ValueError, self.read_csv, StringIO(self.ts_data), - parse_dates={'ID' : [1, 2]}) + parse_dates={'ID': [1, 2]}) data = """\ date_NominalTime,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir @@ -326,7 +326,7 @@ def test_index_col_named(self): h = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" data = h + no_header - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() rs = self.read_csv(StringIO(data), index_col='ID') xp = self.read_csv(StringIO(data), header=0).set_index('ID') tm.assert_frame_equal(rs, xp) @@ -340,8 +340,8 @@ def test_index_col_named(self): 9,10,11,12,foo """ names = ['a', 'b', 'c', 'd', 'message'] - xp = DataFrame({'a' : [1, 5, 9], 'b' : [2, 6, 10], 'c' : [3, 7, 11], - 'd' : [4, 8, 12]}, + xp = DataFrame({'a': [1, 5, 9], 'b': [2, 6, 10], 'c': [3, 7, 11], + 'd': [4, 8, 12]}, index=Index(['hello', 'world', 'foo'], name='message')) rs = self.read_csv(StringIO(data), names=names, index_col=['message']) tm.assert_frame_equal(xp, rs) @@ -352,13 +352,13 @@ def test_index_col_named(self): self.assert_(xp.index.name == rs.index.name) def test_converter_index_col_bug(self): - #1835 + # 1835 data = "A;B\n1;2\n3;4" rs = self.read_csv(StringIO(data), sep=';', index_col='A', - converters={'A' : lambda x: x}) + converters={'A': lambda x: x}) - xp = DataFrame({'B' : [2, 4]}, index=Index([1, 3], name='A')) + xp = DataFrame({'B': [2, 4]}, index=Index([1, 3], name='A')) tm.assert_frame_equal(rs, xp) self.assert_(rs.index.name == xp.index.name) @@ -376,12 +376,13 @@ def test_malformed(self): """ try: - df = self.read_table(StringIO(data), sep=',', header=1, comment='#') + df = self.read_table( + StringIO(data), sep=',', header=1, comment='#') self.assert_(False) except Exception, inst: self.assert_('Expected 3 fields in line 4, saw 5' in str(inst)) - #skip_footer + # skip_footer data = """ignore A,B,C 1,2,3 # comment @@ -391,8 +392,9 @@ def test_malformed(self): """ try: - df = self.read_table(StringIO(data), sep=',', header=1, comment='#', - skip_footer=1) + df = self.read_table( + StringIO(data), sep=',', header=1, comment='#', + skip_footer=1) self.assert_(False) except Exception, inst: self.assert_('Expected 3 fields in line 4, saw 5' in str(inst)) @@ -408,14 +410,13 @@ def test_malformed(self): """ try: it = self.read_table(StringIO(data), sep=',', - header=1, comment='#', iterator=True, chunksize=1, - skiprows=[2]) + header=1, comment='#', iterator=True, chunksize=1, + skiprows=[2]) df = it.read(5) self.assert_(False) except Exception, inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) - # middle chunk data = """ignore A,B,C @@ -433,7 +434,6 @@ def test_malformed(self): except Exception, inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) - # last chunk data = """ignore A,B,C @@ -445,8 +445,8 @@ def test_malformed(self): """ try: it = self.read_table(StringIO(data), sep=',', - header=1, comment='#', iterator=True, chunksize=1, - skiprows=[2]) + header=1, comment='#', iterator=True, chunksize=1, + skiprows=[2]) df = it.read(1) it.read() self.assert_(False) @@ -482,11 +482,11 @@ def test_custom_na_values(self): assert_almost_equal(df.values, expected) df2 = self.read_table(StringIO(data), sep=',', na_values=['baz'], - skiprows=[1]) + skiprows=[1]) assert_almost_equal(df2.values, expected) df3 = self.read_table(StringIO(data), sep=',', na_values='baz', - skiprows=[1]) + skiprows=[1]) assert_almost_equal(df3.values, expected) def test_skiprows_bug(self): @@ -507,7 +507,7 @@ def test_skiprows_bug(self): data2 = self.read_csv(StringIO(text), skiprows=6, header=None, index_col=0, parse_dates=True) - expected = DataFrame(np.arange(1., 10.).reshape((3,3)), + expected = DataFrame(np.arange(1., 10.).reshape((3, 3)), columns=[1, 2, 3], index=[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]) @@ -533,9 +533,9 @@ def test_unnamed_columns(self): 6,7,8,9,10 11,12,13,14,15 """ - expected = [[1,2,3,4,5.], - [6,7,8,9,10], - [11,12,13,14,15]] + expected = [[1, 2, 3, 4, 5.], + [6, 7, 8, 9, 10], + [11, 12, 13, 14, 15]] df = self.read_table(StringIO(data), sep=',') assert_almost_equal(df.values, expected) self.assert_(np.array_equal(df.columns, @@ -594,7 +594,8 @@ def test_parse_dates_implicit_first_col(self): """ df = self.read_csv(StringIO(data), parse_dates=True) expected = self.read_csv(StringIO(data), index_col=0, parse_dates=True) - self.assert_(isinstance(df.index[0], (datetime, np.datetime64, Timestamp))) + self.assert_( + isinstance(df.index[0], (datetime, np.datetime64, Timestamp))) tm.assert_frame_equal(df, expected) def test_parse_dates_string(self): @@ -603,7 +604,8 @@ def test_parse_dates_string(self): 20090102,b,3,4 20090103,c,4,5 """ - rs = self.read_csv(StringIO(data), index_col='date', parse_dates='date') + rs = self.read_csv( + StringIO(data), index_col='date', parse_dates='date') idx = date_range('1/1/2009', periods=3).asobject idx.name = 'date' xp = DataFrame({'A': ['a', 'b', 'c'], @@ -619,18 +621,18 @@ def test_yy_format(self): """ rs = self.read_csv(StringIO(data), index_col=0, parse_dates=[['date', 'time']]) - idx = DatetimeIndex([datetime(2009,1,31,0,10,0), - datetime(2009,2,28,10,20,0), - datetime(2009,3,31,8,30,0)]).asobject + idx = DatetimeIndex([datetime(2009, 1, 31, 0, 10, 0), + datetime(2009, 2, 28, 10, 20, 0), + datetime(2009, 3, 31, 8, 30, 0)]).asobject idx.name = 'date' xp = DataFrame({'B': [1, 3, 5], 'C': [2, 4, 6]}, idx) tm.assert_frame_equal(rs, xp) rs = self.read_csv(StringIO(data), index_col=0, parse_dates=[[0, 1]]) - idx = DatetimeIndex([datetime(2009,1,31,0,10,0), - datetime(2009,2,28,10,20,0), - datetime(2009,3,31,8,30,0)]).asobject + idx = DatetimeIndex([datetime(2009, 1, 31, 0, 10, 0), + datetime(2009, 2, 28, 10, 20, 0), + datetime(2009, 3, 31, 8, 30, 0)]).asobject idx.name = 'date' xp = DataFrame({'B': [1, 3, 5], 'C': [2, 4, 6]}, idx) tm.assert_frame_equal(rs, xp) @@ -653,11 +655,11 @@ def test_parse_dates_column_list(self): expected['aux_date'] = map(Timestamp, expected['aux_date']) self.assert_(isinstance(expected['aux_date'][0], datetime)) - df = self.read_csv(StringIO(data), sep=";", index_col = range(4), + df = self.read_csv(StringIO(data), sep=";", index_col=range(4), parse_dates=[0, 5], dayfirst=True) tm.assert_frame_equal(df, expected) - df = self.read_csv(StringIO(data), sep=";", index_col = range(4), + df = self.read_csv(StringIO(data), sep=";", index_col=range(4), parse_dates=['date', 'aux_date'], dayfirst=True) tm.assert_frame_equal(df, expected) @@ -672,9 +674,9 @@ def test_no_header(self): names = ['foo', 'bar', 'baz', 'quux', 'panda'] df2 = self.read_table(StringIO(data), sep=',', names=names) - expected = [[1,2,3,4,5.], - [6,7,8,9,10], - [11,12,13,14,15]] + expected = [[1, 2, 3, 4, 5.], + [6, 7, 8, 9, 10], + [11, 12, 13, 14, 15]] assert_almost_equal(df.values, expected) assert_almost_equal(df.values, df2.values) @@ -694,9 +696,9 @@ def test_header_with_index_col(self): self.assertEqual(names, ['A', 'B', 'C']) - values = [[1,2,3],[4,5,6],[7,8,9]] - expected = DataFrame(values, index=['foo','bar','baz'], - columns=['A','B','C']) + values = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] + expected = DataFrame(values, index=['foo', 'bar', 'baz'], + columns=['A', 'B', 'C']) tm.assert_frame_equal(df, expected) def test_read_csv_dataframe(self): @@ -746,7 +748,7 @@ def test_read_table_duplicate_index(self): result = self.read_csv(StringIO(data), index_col=0) expected = self.read_csv(StringIO(data)).set_index('index', - verify_integrity=False) + verify_integrity=False) tm.assert_frame_equal(result, expected) def test_read_table_duplicate_index_implicit(self): @@ -834,7 +836,8 @@ def test_read_chunksize(self): tm.assert_frame_equal(chunks[2], df[4:]) def test_read_chunksize_named(self): - reader = self.read_csv(StringIO(self.data1), index_col='index', chunksize=2) + reader = self.read_csv( + StringIO(self.data1), index_col='index', chunksize=2) df = self.read_csv(StringIO(self.data1), index_col='index') chunks = list(reader) @@ -845,11 +848,12 @@ def test_read_chunksize_named(self): def test_read_text_list(self): data = """A,B,C\nfoo,1,2,3\nbar,4,5,6""" - as_list = [['A','B','C'],['foo','1','2','3'],['bar','4','5','6']] + as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar', + '4', '5', '6']] df = self.read_csv(StringIO(data), index_col=0) parser = TextParser(as_list, index_col=0, chunksize=2) - chunk = parser.read(None) + chunk = parser.read(None) tm.assert_frame_equal(chunk, df) @@ -886,7 +890,7 @@ def test_iterator(self): self.assertRaises(ValueError, reader.read, 3) treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, - iterator=True) + iterator=True) self.assert_(isinstance(treader, TextFileReader)) def test_header_not_first_line(self): @@ -1017,7 +1021,6 @@ def test_skip_footer(self): tm.assert_frame_equal(result, expected) - def test_no_unnamed_index(self): data = """ id c0 c1 c2 0 1 0 a b @@ -1035,8 +1038,8 @@ def test_converters(self): """ from dateutil import parser - result = self.read_csv(StringIO(data), converters={'D' : parser.parse}) - result2 = self.read_csv(StringIO(data), converters={3 : parser.parse}) + result = self.read_csv(StringIO(data), converters={'D': parser.parse}) + result2 = self.read_csv(StringIO(data), converters={3: parser.parse}) expected = self.read_csv(StringIO(data)) expected['D'] = expected['D'].map(parser.parse) @@ -1047,13 +1050,13 @@ def test_converters(self): # produce integer converter = lambda x: int(x.split('/')[2]) - result = self.read_csv(StringIO(data), converters={'D' : converter}) + result = self.read_csv(StringIO(data), converters={'D': converter}) expected = self.read_csv(StringIO(data)) expected['D'] = expected['D'].map(converter) tm.assert_frame_equal(result, expected) def test_converters_no_implicit_conv(self): - #GH2184 + # GH2184 data = """000102,1.2,A\n001245,2,B""" f = lambda x: x.strip() converter = {0: f} @@ -1065,9 +1068,9 @@ def test_converters_euro_decimal_format(self): 1;1521,1541;187101,9543;ABC;poi;4,738797819 2;121,12;14897,76;DEF;uyt;0,377320872 3;878,158;108013,434;GHI;rez;2,735694704""" - f = lambda x : float(x.replace(",", ".")) - converter = {'Number1':f,'Number2':f, 'Number3':f} - df2 = self.read_csv(StringIO(data), sep=';',converters=converter) + f = lambda x: float(x.replace(",", ".")) + converter = {'Number1': f, 'Number2': f, 'Number3': f} + df2 = self.read_csv(StringIO(data), sep=';', converters=converter) self.assert_(df2['Number1'].dtype == float) self.assert_(df2['Number2'].dtype == float) self.assert_(df2['Number3'].dtype == float) @@ -1078,9 +1081,9 @@ def test_converter_return_string_bug(self): 1;1521,1541;187101,9543;ABC;poi;4,738797819 2;121,12;14897,76;DEF;uyt;0,377320872 3;878,158;108013,434;GHI;rez;2,735694704""" - f = lambda x : float(x.replace(",", ".")) - converter = {'Number1':f,'Number2':f, 'Number3':f} - df2 = self.read_csv(StringIO(data), sep=';',converters=converter) + f = lambda x: float(x.replace(",", ".")) + converter = {'Number1': f, 'Number2': f, 'Number3': f} + df2 = self.read_csv(StringIO(data), sep=';', converters=converter) self.assert_(df2['Number1'].dtype == float) def test_read_table_buglet_4x_multiindex(self): @@ -1101,8 +1104,8 @@ def test_read_csv_parse_simple_list(self): foo bar""" df = read_csv(StringIO(text), header=None) - expected = DataFrame({0 : ['foo', 'bar baz', 'qux foo', - 'foo', 'bar']}) + expected = DataFrame({0: ['foo', 'bar baz', 'qux foo', + 'foo', 'bar']}) tm.assert_frame_equal(df, expected) def test_parse_dates_custom_euroformat(self): @@ -1120,7 +1123,7 @@ def test_parse_dates_custom_euroformat(self): exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2)], name='time') - expected = DataFrame({'Q' : [1, 1, 1], 'NTU' : [2, np.nan, 2]}, + expected = DataFrame({'Q': [1, 1, 1], 'NTU': [2, np.nan, 2]}, index=exp_index, columns=['Q', 'NTU']) tm.assert_frame_equal(df, expected) @@ -1139,7 +1142,7 @@ def test_na_value_dict(self): bar,foo,foo""" df = self.read_csv(StringIO(data), - na_values={'A': ['foo'], 'B': ['bar']}) + na_values={'A': ['foo'], 'B': ['bar']}) expected = DataFrame({'A': [np.nan, 'bar', np.nan, 'bar'], 'B': [np.nan, 'foo', np.nan, 'foo'], 'C': [np.nan, 'foo', np.nan, 'foo']}) @@ -1177,7 +1180,7 @@ def test_url(self): localtable = os.path.join(dirpath, 'salary.table') local_table = self.read_table(localtable) tm.assert_frame_equal(url_table, local_table) - #TODO: ftp testing + # TODO: ftp testing except urllib2.URLError: try: @@ -1199,7 +1202,7 @@ def test_file(self): local_table = self.read_table(localtable) try: - url_table = self.read_table('file://localhost/'+localtable) + url_table = self.read_table('file://localhost/' + localtable) except urllib2.URLError: # fails on some systems raise nose.SkipTest @@ -1217,7 +1220,7 @@ def test_parse_tz_aware(self): self.assert_(stamp.minute == 39) try: self.assert_(result.index.tz is pytz.utc) - except AssertionError: # hello Yaroslav + except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] self.assert_(stamp.minute == result.minute) @@ -1246,8 +1249,10 @@ def test_multiple_date_cols_index(self): tm.assert_frame_equal(df3, df) def test_multiple_date_cols_chunked(self): - df = self.read_csv(StringIO(self.ts_data), parse_dates={'nominal': [1,2]}, index_col='nominal') - reader = self.read_csv(StringIO(self.ts_data), parse_dates={'nominal': [1,2]}, index_col='nominal', chunksize=2) + df = self.read_csv(StringIO(self.ts_data), parse_dates={ + 'nominal': [1, 2]}, index_col='nominal') + reader = self.read_csv(StringIO(self.ts_data), parse_dates={'nominal': + [1, 2]}, index_col='nominal', chunksize=2) chunks = list(reader) @@ -1259,20 +1264,20 @@ def test_multiple_date_cols_chunked(self): def test_multiple_date_col_named_components(self): xp = self.read_csv(StringIO(self.ts_data), - parse_dates={'nominal': [1,2]}, + parse_dates={'nominal': [1, 2]}, index_col='nominal') - colspec = {'nominal' : ['date', 'nominalTime']} + colspec = {'nominal': ['date', 'nominalTime']} df = self.read_csv(StringIO(self.ts_data), parse_dates=colspec, index_col='nominal') tm.assert_frame_equal(df, xp) def test_multiple_date_col_multiple_index(self): df = self.read_csv(StringIO(self.ts_data), - parse_dates={'nominal' : [1, 2]}, + parse_dates={'nominal': [1, 2]}, index_col=['nominal', 'ID']) xp = self.read_csv(StringIO(self.ts_data), - parse_dates={'nominal' : [1, 2]}) + parse_dates={'nominal': [1, 2]}) tm.assert_frame_equal(xp.set_index(['nominal', 'ID']), df) @@ -1299,7 +1304,7 @@ def test_bool_na_values(self): result = self.read_csv(StringIO(data)) expected = DataFrame({'A': np.array([True, nan, False], dtype=object), 'B': np.array([False, True, nan], dtype=object), - 'C': [True, False, True]}) + 'C': [True, False, True]}) tm.assert_frame_equal(result, expected) @@ -1316,7 +1321,6 @@ def test_missing_trailing_delimiters(self): result = self.read_csv(StringIO(data)) self.assertTrue(result['D'].isnull()[1:].all()) - def test_skipinitialspace(self): s = ('"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' '1.00361, 1.12551, 330.65659, 0355626618.16711, 73.48821, ' @@ -1357,11 +1361,11 @@ def test_utf16_bom_skiprows(self): if py3compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper - s =TextIOWrapper(s, encoding='utf-8') + s = TextIOWrapper(s, encoding='utf-8') result = self.read_csv(path, encoding=enc, skiprows=2, sep=sep) - expected = self.read_csv(s,encoding='utf-8', skiprows=2, + expected = self.read_csv(s, encoding='utf-8', skiprows=2, sep=sep) tm.assert_frame_equal(result, expected) @@ -1383,7 +1387,6 @@ def test_utf16_example(self): result = self.read_table(buf, encoding='utf-16') self.assertEquals(len(result), 50) - def test_converters_corner_with_nas(self): # skip aberration observed on Win64 Python 3.2.2 if hash(np.int64(-1)) != -2: @@ -1397,48 +1400,51 @@ def test_converters_corner_with_nas(self): 4,6-12,2""" def convert_days(x): - x = x.strip() - if not x: return np.nan + x = x.strip() + if not x: + return np.nan - is_plus = x.endswith('+') - if is_plus: - x = int(x[:-1]) + 1 - else: - x = int(x) - return x + is_plus = x.endswith('+') + if is_plus: + x = int(x[:-1]) + 1 + else: + x = int(x) + return x def convert_days_sentinel(x): - x = x.strip() - if not x: return np.nan + x = x.strip() + if not x: + return np.nan - is_plus = x.endswith('+') - if is_plus: - x = int(x[:-1]) + 1 - else: - x = int(x) - return x + is_plus = x.endswith('+') + if is_plus: + x = int(x[:-1]) + 1 + else: + x = int(x) + return x def convert_score(x): - x = x.strip() - if not x: return np.nan - if x.find('-')>0: - valmin, valmax = map(int, x.split('-')) - val = 0.5*(valmin + valmax) - else: - val = float(x) + x = x.strip() + if not x: + return np.nan + if x.find('-') > 0: + valmin, valmax = map(int, x.split('-')) + val = 0.5 * (valmin + valmax) + else: + val = float(x) - return val + return val fh = StringIO.StringIO(csv) - result = self.read_csv(fh, converters={'score':convert_score, - 'days':convert_days}, - na_values=['',None]) + result = self.read_csv(fh, converters={'score': convert_score, + 'days': convert_days}, + na_values=['', None]) self.assert_(pd.isnull(result['days'][1])) fh = StringIO.StringIO(csv) - result2 = self.read_csv(fh, converters={'score':convert_score, - 'days':convert_days_sentinel}, - na_values=['',None]) + result2 = self.read_csv(fh, converters={'score': convert_score, + 'days': convert_days_sentinel}, + na_values=['', None]) tm.assert_frame_equal(result, result2) def test_unicode_encoding(self): @@ -1467,7 +1473,8 @@ def test_trailing_delimiters(self): tm.assert_frame_equal(result, expected) def test_escapechar(self): - # http://stackoverflow.com/questions/13824840/feature-request-for-pandas-read-csv + # http://stackoverflow.com/questions/13824840/feature-request-for- + # pandas-read-csv data = '''SEARCH_TERM,ACTUAL_URL "bra tv bord","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" "tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" @@ -1540,7 +1547,6 @@ def test_sniff_delimiter(self): sep=None, skiprows=2) tm.assert_frame_equal(data, data3) - text = u"""ignore this ignore this too index|A|B|C @@ -1553,11 +1559,10 @@ def test_sniff_delimiter(self): if py3compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper - s =TextIOWrapper(s, encoding='utf-8') - + s = TextIOWrapper(s, encoding='utf-8') data4 = self.read_csv(s, index_col=0, sep=None, skiprows=2, - encoding='utf-8') + encoding='utf-8') tm.assert_frame_equal(data, data4) def test_regex_separator(self): @@ -1568,7 +1573,7 @@ def test_regex_separator(self): """ df = self.read_table(StringIO(data), sep='\s+') expected = self.read_csv(StringIO(re.sub('[ ]+', ',', data)), - index_col=0) + index_col=0) self.assert_(expected.index.name is None) tm.assert_frame_equal(df, expected) @@ -1579,7 +1584,7 @@ def test_1000_fwf(self): """ expected = [[1, 2334., 5], [10, 13, 10]] - df = read_fwf(StringIO(data), colspecs=[(0,3),(3,11),(12,16)], + df = read_fwf(StringIO(data), colspecs=[(0, 3), (3, 11), (12, 16)], thousands=',') assert_almost_equal(df.values, expected) @@ -1590,7 +1595,7 @@ def test_comment_fwf(self): """ expected = [[1, 2., 4], [5, np.nan, 10.]] - df = read_fwf(StringIO(data), colspecs=[(0,3),(4,9),(9,25)], + df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)], comment='#') assert_almost_equal(df.values, expected) @@ -1635,13 +1640,13 @@ def test_fwf(self): 201161~~~~413.836124~~~184.375703~~~11916.8 201162~~~~502.953953~~~173.237159~~~12468.3 """ - df = read_fwf(StringIO(data3), colspecs=colspecs, delimiter='~', header=None) + df = read_fwf( + StringIO(data3), colspecs=colspecs, delimiter='~', header=None) tm.assert_frame_equal(df, expected) self.assertRaises(ValueError, read_fwf, StringIO(data3), colspecs=colspecs, widths=[6, 10, 10, 7]) - def test_verbose_import(self): text = """a,b,c,d one,1,2,3 @@ -1750,6 +1755,7 @@ def test_parse_dates_empty_string(self): result = pd.read_csv(s, parse_dates=["Date"], na_filter=False) self.assertTrue(result['Date'].isnull()[1]) + class TestCParserLowMemory(ParserTests, unittest.TestCase): def read_csv(self, *args, **kwds): @@ -1849,7 +1855,8 @@ def test_pure_python_failover(self): def test_decompression(self): try: - import gzip, bz2 + import gzip + import bz2 except ImportError: raise nose.SkipTest @@ -1894,7 +1901,8 @@ def test_decompression(self): def test_decompression_regex_sep(self): try: - import gzip, bz2 + import gzip + import bz2 except ImportError: raise nose.SkipTest @@ -2066,5 +2074,5 @@ def curpath(): if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 86b183c7bfc76..1c8ec54b65487 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -23,10 +23,11 @@ from distutils.version import LooseVersion _default_compressor = LooseVersion(tables.__version__) >= '2.2' \ - and 'blosc' or 'zlib' + and 'blosc' or 'zlib' _multiprocess_can_split_ = False + class TestHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' @@ -61,7 +62,8 @@ def test_keys(self): self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.assertEquals(len(self.store), 5) - self.assert_(set(self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar'])) + self.assert_(set( + self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar'])) def test_repr(self): repr(self.store) @@ -101,14 +103,15 @@ def test_versioning(self): self.store.remove('df2') self.store.append('df2', df) - # this is an error because its table_type is appendable, but no version info + # this is an error because its table_type is appendable, but no version + # info self.store.get_node('df2')._v_attrs.pandas_version = None - self.assertRaises(Exception, self.store.select,'df2') + self.assertRaises(Exception, self.store.select, 'df2') def test_meta(self): raise nose.SkipTest('no meta') - meta = { 'foo' : [ 'I love pandas ' ] } + meta = {'foo': ['I love pandas ']} s = tm.makeTimeSeries() s.meta = meta self.store['a'] = s @@ -124,12 +127,12 @@ def test_meta(self): self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) results = self.store['df1'] - #self.assert_(getattr(results,'meta',None) == meta) + # self.assert_(getattr(results,'meta',None) == meta) # no meta df = tm.makeDataFrame() self.store['b'] = df - self.assert_(hasattr(self.store['b'],'meta') == False) + self.assert_(hasattr(self.store['b'], 'meta') is False) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() @@ -164,11 +167,13 @@ def test_put(self): self.store.put('c', df[:10], table=True) # not OK, not a table - self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) + self.assertRaises( + ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case - self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) + self.assertRaises( + ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) @@ -179,9 +184,10 @@ def test_put(self): def test_put_string_index(self): - index = Index([ "I am a very long string index: %s" % i for i in range(20) ]) - s = Series(np.arange(20), index = index) - df = DataFrame({ 'A' : s, 'B' : s }) + index = Index( + ["I am a very long string index: %s" % i for i in range(20)]) + s = Series(np.arange(20), index=index) + df = DataFrame({'A': s, 'B': s}) self.store['a'] = s tm.assert_series_equal(self.store['a'], s) @@ -190,16 +196,15 @@ def test_put_string_index(self): tm.assert_frame_equal(self.store['b'], df) # mixed length - index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + [ "I am a very long string index: %s" % i for i in range(20) ]) - s = Series(np.arange(21), index = index) - df = DataFrame({ 'A' : s, 'B' : s }) + index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + ["I am a very long string index: %s" % i for i in range(20)]) + s = Series(np.arange(21), index=index) + df = DataFrame({'A': s, 'B': s}) self.store['a'] = s tm.assert_series_equal(self.store['a'], s) self.store['b'] = df tm.assert_frame_equal(self.store['b'], df) - def test_put_compression(self): df = tm.makeTimeDataFrame() @@ -251,25 +256,27 @@ def test_append(self): self.store.append('/df3 foo', df[10:]) tm.assert_frame_equal(self.store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) - + # panel wp = tm.makePanel() self.store.remove('wp1') - self.store.append('wp1', wp.ix[:,:10,:]) - self.store.append('wp1', wp.ix[:,10:,:]) + self.store.append('wp1', wp.ix[:, :10, :]) + self.store.append('wp1', wp.ix[:, 10:, :]) tm.assert_panel_equal(self.store['wp1'], wp) # ndim p4d = tm.makePanel4D() self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:,:,:10,:]) - self.store.append('p4d', p4d.ix[:,:,10:,:]) + self.store.append('p4d', p4d.ix[:, :, :10, :]) + self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) # test using axis labels self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:,:,:10,:], axes=['items','major_axis','minor_axis']) - self.store.append('p4d', p4d.ix[:,:,10:,:], axes=['items','major_axis','minor_axis']) + self.store.append('p4d', p4d.ix[:, :, :10, :], axes=[ + 'items', 'major_axis', 'minor_axis']) + self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=[ + 'items', 'major_axis', 'minor_axis']) tm.assert_panel4d_equal(self.store['p4d'], p4d) # test using differnt number of items on each axis @@ -277,32 +284,33 @@ def test_append(self): p4d2['l4'] = p4d['l1'] p4d2['l5'] = p4d['l1'] self.store.remove('p4d2') - self.store.append('p4d2', p4d2, axes=['items','major_axis','minor_axis']) + self.store.append( + 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis']) tm.assert_panel4d_equal(self.store['p4d2'], p4d2) # test using differt order of items on the non-index axes self.store.remove('wp1') - wp_append1 = wp.ix[:,:10,:] + wp_append1 = wp.ix[:, :10, :] self.store.append('wp1', wp_append1) - wp_append2 = wp.ix[:,10:,:].reindex(items = wp.items[::-1]) - self.store.append('wp1', wp_append2) + wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1]) + self.store.append('wp1', wp_append2) tm.assert_panel_equal(self.store['wp1'], wp) - + # dtype issues - mizxed type in a single object column - df = DataFrame(data=[[1,2],[0,1],[1,2],[0,0]]) + df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) df['mixed_column'] = 'testing' - df.ix[2,'mixed_column'] = np.nan + df.ix[2, 'mixed_column'] = np.nan self.store.remove('df') self.store.append('df', df) - tm.assert_frame_equal(self.store['df'],df) + tm.assert_frame_equal(self.store['df'], df) def test_append_frame_column_oriented(self): # column oriented df = tm.makeTimeDataFrame() self.store.remove('df1') - self.store.append('df1', df.ix[:,:2], axes = ['columns']) - self.store.append('df1', df.ix[:,2:]) + self.store.append('df1', df.ix[:, :2], axes=['columns']) + self.store.append('df1', df.ix[:, 2:]) tm.assert_frame_equal(self.store['df1'], df) result = self.store.select('df1', 'columns=A') @@ -310,11 +318,13 @@ def test_append_frame_column_oriented(self): tm.assert_frame_equal(expected, result) # this isn't supported - self.assertRaises(Exception, self.store.select, 'df1', ('columns=A', Term('index','>',df.index[4]))) + self.assertRaises(Exception, self.store.select, 'df1', ( + 'columns=A', Term('index', '>', df.index[4]))) # selection on the non-indexable - result = self.store.select('df1', ('columns=A', Term('index','=',df.index[0:4]))) - expected = df.reindex(columns=['A'],index=df.index[0:4]) + result = self.store.select( + 'df1', ('columns=A', Term('index', '=', df.index[0:4]))) + expected = df.reindex(columns=['A'], index=df.index[0:4]) tm.assert_frame_equal(expected, result) def test_ndim_indexables(self): @@ -323,84 +333,92 @@ def test_ndim_indexables(self): p4d = tm.makePanel4D() def check_indexers(key, indexers): - for i,idx in enumerate(indexers): - self.assert_(getattr(getattr(self.store.root,key).table.description,idx)._v_pos == i) + for i, idx in enumerate(indexers): + self.assert_(getattr(getattr( + self.store.root, key).table.description, idx)._v_pos == i) # append then change (will take existing schema) - indexers = ['items','major_axis','minor_axis'] - + indexers = ['items', 'major_axis', 'minor_axis'] + self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) - self.store.append('p4d', p4d.ix[:,:,10:,:]) - tm.assert_panel4d_equal(self.store.select('p4d'),p4d) - check_indexers('p4d',indexers) + self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + self.store.append('p4d', p4d.ix[:, :, 10:, :]) + tm.assert_panel4d_equal(self.store.select('p4d'), p4d) + check_indexers('p4d', indexers) # same as above, but try to append with differnt axes self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) - self.store.append('p4d', p4d.ix[:,:,10:,:], axes=['labels','items','major_axis']) - tm.assert_panel4d_equal(self.store.select('p4d'),p4d) - check_indexers('p4d',indexers) + self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=[ + 'labels', 'items', 'major_axis']) + tm.assert_panel4d_equal(self.store.select('p4d'), p4d) + check_indexers('p4d', indexers) # pass incorrect number of axes self.store.remove('p4d') - self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[:,:,:10,:], axes=['major_axis','minor_axis']) + self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[ + :, :, :10, :], axes=['major_axis', 'minor_axis']) # different than default indexables #1 - indexers = ['labels','major_axis','minor_axis'] + indexers = ['labels', 'major_axis', 'minor_axis'] self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) - self.store.append('p4d', p4d.ix[:,:,10:,:]) + self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) - check_indexers('p4d',indexers) - + check_indexers('p4d', indexers) + # different than default indexables #2 - indexers = ['major_axis','labels','minor_axis'] + indexers = ['major_axis', 'labels', 'minor_axis'] self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) - self.store.append('p4d', p4d.ix[:,:,10:,:]) + self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) - check_indexers('p4d',indexers) + check_indexers('p4d', indexers) # partial selection - result = self.store.select('p4d',['labels=l1']) - expected = p4d.reindex(labels = ['l1']) + result = self.store.select('p4d', ['labels=l1']) + expected = p4d.reindex(labels=['l1']) tm.assert_panel4d_equal(result, expected) # partial selection2 - result = self.store.select('p4d',[Term('labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) - expected = p4d.reindex(labels = ['l1'], items = ['ItemA'], minor_axis = ['B']) + result = self.store.select('p4d', [Term( + 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) + expected = p4d.reindex( + labels=['l1'], items=['ItemA'], minor_axis=['B']) tm.assert_panel4d_equal(result, expected) # non-existant partial selection - result = self.store.select('p4d',[Term('labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) - expected = p4d.reindex(labels = ['l1'], items = [], minor_axis = ['B']) + result = self.store.select('p4d', [Term( + 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) + expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B']) tm.assert_panel4d_equal(result, expected) def test_append_with_strings(self): wp = tm.makePanel() - wp2 = wp.rename_axis(dict([ (x,"%s_extra" % x) for x in wp.minor_axis ]), axis = 2) + wp2 = wp.rename_axis( + dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) - def check_col(key,name,size): - self.assert_(getattr(self.store.get_table(key).table.description,name).itemsize == size) + def check_col(key, name, size): + self.assert_(getattr(self.store.get_table( + key).table.description, name).itemsize == size) - self.store.append('s1', wp, min_itemsize = 20) + self.store.append('s1', wp, min_itemsize=20) self.store.append('s1', wp2) - expected = concat([ wp, wp2], axis = 2) - expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s1'], expected) - check_col('s1','minor_axis',20) + check_col('s1', 'minor_axis', 20) # test dict format - self.store.append('s2', wp, min_itemsize = { 'minor_axis' : 20 }) + self.store.append('s2', wp, min_itemsize={'minor_axis': 20}) self.store.append('s2', wp2) - expected = concat([ wp, wp2], axis = 2) - expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s2'], expected) - check_col('s2','minor_axis',20) + check_col('s2', 'minor_axis', 20) # apply the wrong field (similar to #1) - self.store.append('s3', wp, min_itemsize = { 'major_axis' : 20 }) + self.store.append('s3', wp, min_itemsize={'major_axis': 20}) self.assertRaises(Exception, self.store.append, 's3') # test truncation of bigger strings @@ -408,99 +426,104 @@ def check_col(key,name,size): self.assertRaises(Exception, self.store.append, 's4', wp2) # avoid truncation on elements - df = DataFrame([[123,'asdqwerty'], [345,'dggnhebbsdfbdfb']]) - self.store.append('df_big',df) + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + self.store.append('df_big', df) tm.assert_frame_equal(self.store.select('df_big'), df) - check_col('df_big','values_block_1',15) + check_col('df_big', 'values_block_1', 15) # appending smaller string ok - df2 = DataFrame([[124,'asdqy'], [346,'dggnhefbdfb']]) - self.store.append('df_big',df2) - expected = concat([ df, df2 ]) + df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) + self.store.append('df_big', df2) + expected = concat([df, df2]) tm.assert_frame_equal(self.store.select('df_big'), expected) - check_col('df_big','values_block_1',15) + check_col('df_big', 'values_block_1', 15) # avoid truncation on elements - df = DataFrame([[123,'asdqwerty'], [345,'dggnhebbsdfbdfb']]) - self.store.append('df_big2',df, min_itemsize = { 'values' : 50 }) + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + self.store.append('df_big2', df, min_itemsize={'values': 50}) tm.assert_frame_equal(self.store.select('df_big2'), df) - check_col('df_big2','values_block_1',50) + check_col('df_big2', 'values_block_1', 50) # bigger string on next append - self.store.append('df_new',df) - df_new = DataFrame([[124,'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) - self.assertRaises(Exception, self.store.append, 'df_new',df_new) + self.store.append('df_new', df) + df_new = DataFrame( + [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) + self.assertRaises(Exception, self.store.append, 'df_new', df_new) # with nans self.store.remove('df') df = tm.makeTimeDataFrame() df['string'] = 'foo' - df.ix[1:4,'string'] = np.nan + df.ix[1:4, 'string'] = np.nan df['string2'] = 'bar' - df.ix[4:8,'string2'] = np.nan + df.ix[4:8, 'string2'] = np.nan df['string3'] = 'bah' - df.ix[1:,'string3'] = np.nan - self.store.append('df',df) + df.ix[1:, 'string3'] = np.nan + self.store.append('df', df) result = self.store.select('df') - tm.assert_frame_equal(result,df) - + tm.assert_frame_equal(result, df) def test_append_with_data_columns(self): df = tm.makeTimeDataFrame() self.store.remove('df') - self.store.append('df', df[:2], data_columns = ['B']) + self.store.append('df', df[:2], data_columns=['B']) self.store.append('df', df[2:]) tm.assert_frame_equal(self.store['df'], df) # check that we have indicies created - assert(self.store.handle.root.df.table.cols.index.is_indexed == True) - assert(self.store.handle.root.df.table.cols.B.is_indexed == True) + assert(self.store.handle.root.df.table.cols.index.is_indexed is True) + assert(self.store.handle.root.df.table.cols.B.is_indexed is True) # data column searching - result = self.store.select('df', [ Term('B>0') ]) - expected = df[df.B>0] + result = self.store.select('df', [Term('B>0')]) + expected = df[df.B > 0] tm.assert_frame_equal(result, expected) # data column searching (with an indexable and a data_columns) - result = self.store.select('df', [ Term('B>0'), Term('index','>',df.index[3]) ]) + result = self.store.select( + 'df', [Term('B>0'), Term('index', '>', df.index[3])]) df_new = df.reindex(index=df.index[4:]) - expected = df_new[df_new.B>0] + expected = df_new[df_new.B > 0] tm.assert_frame_equal(result, expected) - + # data column selection with a string data_column df_new = df.copy() df_new['string'] = 'foo' df_new['string'][1:4] = np.nan df_new['string'][5:6] = 'bar' self.store.remove('df') - self.store.append('df', df_new, data_columns = ['string']) - result = self.store.select('df', [ Term('string', '=', 'foo') ]) + self.store.append('df', df_new, data_columns=['string']) + result = self.store.select('df', [Term('string', '=', 'foo')]) expected = df_new[df_new.string == 'foo'] tm.assert_frame_equal(result, expected) # using min_itemsize and a data column - def check_col(key,name,size): - self.assert_(getattr(self.store.get_table(key).table.description,name).itemsize == size) + def check_col(key, name, size): + self.assert_(getattr(self.store.get_table( + key).table.description, name).itemsize == size) self.store.remove('df') - self.store.append('df', df_new, data_columns = ['string'], min_itemsize = { 'string' : 30 }) - check_col('df','string',30) + self.store.append('df', df_new, data_columns=['string'], + min_itemsize={'string': 30}) + check_col('df', 'string', 30) self.store.remove('df') - self.store.append('df', df_new, data_columns = ['string'], min_itemsize = 30) - check_col('df','string',30) + self.store.append( + 'df', df_new, data_columns=['string'], min_itemsize=30) + check_col('df', 'string', 30) self.store.remove('df') - self.store.append('df', df_new, data_columns = ['string'], min_itemsize = { 'values' : 30 }) - check_col('df','string',30) + self.store.append('df', df_new, data_columns=['string'], + min_itemsize={'values': 30}) + check_col('df', 'string', 30) df_new['string2'] = 'foobarbah' df_new['string_block1'] = 'foobarbah1' df_new['string_block2'] = 'foobarbah2' self.store.remove('df') - self.store.append('df', df_new, data_columns = ['string','string2'], min_itemsize = { 'string' : 30, 'string2' : 40, 'values' : 50 }) - check_col('df','string',30) - check_col('df','string2',40) - check_col('df','values_block_1',50) + self.store.append('df', df_new, data_columns=['string', 'string2'], min_itemsize={'string': 30, 'string2': 40, 'values': 50}) + check_col('df', 'string', 30) + check_col('df', 'string2', 40) + check_col('df', 'values_block_1', 50) # multiple data columns df_new = df.copy() @@ -511,87 +534,96 @@ def check_col(key,name,size): df_new['string2'][2:5] = np.nan df_new['string2'][7:8] = 'bar' self.store.remove('df') - self.store.append('df', df_new, data_columns = ['A','B','string','string2']) - result = self.store.select('df', [ Term('string', '=', 'foo'), Term('string2=foo'), Term('A>0'), Term('B<0') ]) - expected = df_new[(df_new.string == 'foo') & (df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)] + self.store.append( + 'df', df_new, data_columns=['A', 'B', 'string', 'string2']) + result = self.store.select('df', [Term('string', '=', 'foo'), Term( + 'string2=foo'), Term('A>0'), Term('B<0')]) + expected = df_new[(df_new.string == 'foo') & ( + df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)] tm.assert_frame_equal(result, expected) # yield an empty frame - result = self.store.select('df', [ Term('string', '=', 'foo'), Term('string2=bar'), Term('A>0'), Term('B<0') ]) - expected = df_new[(df_new.string == 'foo') & (df_new.string2 == 'bar') & (df_new.A > 0) & (df_new.B < 0)] + result = self.store.select('df', [Term('string', '=', 'foo'), Term( + 'string2=bar'), Term('A>0'), Term('B<0')]) + expected = df_new[(df_new.string == 'foo') & ( + df_new.string2 == 'bar') & (df_new.A > 0) & (df_new.B < 0)] tm.assert_frame_equal(result, expected) # doc example df_dc = df.copy() df_dc['string'] = 'foo' - df_dc.ix[4:6,'string'] = np.nan - df_dc.ix[7:9,'string'] = 'bar' + df_dc.ix[4:6, 'string'] = np.nan + df_dc.ix[7:9, 'string'] = 'bar' df_dc['string2'] = 'cool' df_dc['datetime'] = Timestamp('20010102') df_dc = df_dc.convert_objects() - df_dc.ix[3:5,['A','B','datetime']] = np.nan + df_dc.ix[3:5, ['A', 'B', 'datetime']] = np.nan self.store.remove('df_dc') - self.store.append('df_dc', df_dc, data_columns = ['B','C','string','string2','datetime']) - result = self.store.select('df_dc',[ Term('B>0') ]) + self.store.append('df_dc', df_dc, data_columns=['B', 'C', + 'string', 'string2', 'datetime']) + result = self.store.select('df_dc', [Term('B>0')]) expected = df_dc[df_dc.B > 0] tm.assert_frame_equal(result, expected) - result = self.store.select('df_dc',[ 'B > 0', 'C > 0', 'string == foo' ]) - expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == 'foo')] + result = self.store.select( + 'df_dc', ['B > 0', 'C > 0', 'string == foo']) + expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & ( + df_dc.string == 'foo')] tm.assert_frame_equal(result, expected) def test_create_table_index(self): - def col(t,column): - return getattr(self.store.get_table(t).table.cols,column) + def col(t, column): + return getattr(self.store.get_table(t).table.cols, column) # index=False wp = tm.makePanel() self.store.append('p5', wp, index=False) - self.store.create_table_index('p5', columns = ['major_axis']) - assert(col('p5','major_axis').is_indexed == True) - assert(col('p5','minor_axis').is_indexed == False) + self.store.create_table_index('p5', columns=['major_axis']) + assert(col('p5', 'major_axis').is_indexed is True) + assert(col('p5', 'minor_axis').is_indexed is False) # index=True self.store.append('p5i', wp, index=True) - assert(col('p5i','major_axis').is_indexed == True) - assert(col('p5i','minor_axis').is_indexed == True) + assert(col('p5i', 'major_axis').is_indexed is True) + assert(col('p5i', 'minor_axis').is_indexed is True) # default optlevels self.store.get_table('p5').create_index() - assert(col('p5','major_axis').index.optlevel == 6) - assert(col('p5','minor_axis').index.kind == 'medium') + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') # let's change the indexing scheme self.store.create_table_index('p5') - assert(col('p5','major_axis').index.optlevel == 6) - assert(col('p5','minor_axis').index.kind == 'medium') + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') self.store.create_table_index('p5', optlevel=9) - assert(col('p5','major_axis').index.optlevel == 9) - assert(col('p5','minor_axis').index.kind == 'medium') + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'medium') self.store.create_table_index('p5', kind='full') - assert(col('p5','major_axis').index.optlevel == 9) - assert(col('p5','minor_axis').index.kind == 'full') + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'full') self.store.create_table_index('p5', optlevel=1, kind='light') - assert(col('p5','major_axis').index.optlevel == 1) - assert(col('p5','minor_axis').index.kind == 'light') - + assert(col('p5', 'major_axis').index.optlevel == 1) + assert(col('p5', 'minor_axis').index.kind == 'light') + # data columns df = tm.makeTimeDataFrame() df['string'] = 'foo' df['string2'] = 'bar' - self.store.append('f', df, data_columns=['string','string2']) - assert(col('f','index').is_indexed == True) - assert(col('f','string').is_indexed == True) - assert(col('f','string2').is_indexed == True) + self.store.append('f', df, data_columns=['string', 'string2']) + assert(col('f', 'index').is_indexed is True) + assert(col('f', 'string').is_indexed is True) + assert(col('f', 'string2').is_indexed is True) # specify index=columns - self.store.append('f2', df, index=['string'], data_columns=['string','string2']) - assert(col('f2','index').is_indexed == False) - assert(col('f2','string').is_indexed == True) - assert(col('f2','string2').is_indexed == False) + self.store.append( + 'f2', df, index=['string'], data_columns=['string', 'string2']) + assert(col('f2', 'index').is_indexed is False) + assert(col('f2', 'string').is_indexed is True) + assert(col('f2', 'string2').is_indexed is False) # try to index a non-table self.store.put('f2', df) @@ -605,13 +637,13 @@ def col(t,column): # test out some versions original = tables.__version__ - for v in ['2.2','2.2b']: + for v in ['2.2', '2.2b']: pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = v self.assertRaises(Exception, self.store.create_table_index, 'f') - for v in ['2.3.1','2.3.1b','2.4dev','2.4',original]: + for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', original]: pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = v @@ -619,13 +651,13 @@ def col(t,column): pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = original - def test_big_table_frame(self): raise nose.SkipTest('no big table frame') # create and write a big table - df = DataFrame(np.random.randn(2000*100, 100), index = range(2000*100), columns = [ 'E%03d' % i for i in xrange(100) ]) + df = DataFrame(np.random.randn(2000 * 100, 100), index=range( + 2000 * 100), columns=['E%03d' % i for i in xrange(100)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x @@ -633,45 +665,46 @@ def test_big_table_frame(self): x = time.time() try: store = HDFStore(self.scratchpath) - store.append('df',df) + store.append('df', df) rows = store.root.df.table.nrows recons = store.select('df') finally: store.close() os.remove(self.scratchpath) - print "\nbig_table frame [%s] -> %5.2f" % (rows,time.time()-x) - + print "\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x) def test_big_table2_frame(self): - # this is a really big table: 2.5m rows x 300 float columns, 20 string columns + # this is a really big table: 2.5m rows x 300 float columns, 20 string + # columns raise nose.SkipTest('no big table2 frame') # create and write a big table print "\nbig_table2 start" import time start_time = time.time() - df = DataFrame(np.random.randn(2.5*1000*1000, 300), index = range(int(2.5*1000*1000)), columns = [ 'E%03d' % i for i in xrange(300) ]) + df = DataFrame(np.random.randn(2.5 * 1000 * 1000, 300), index=range(int( + 2.5 * 1000 * 1000)), columns=['E%03d' % i for i in xrange(300)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x - print "\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index),time.time()-start_time) + print "\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time) fn = 'big_table2.h5' try: - + def f(chunksize): - store = HDFStore(fn,mode = 'w') - store.append('df',df,chunksize=chunksize) + store = HDFStore(fn, mode='w') + store.append('df', df, chunksize=chunksize) r = store.root.df.table.nrows store.close() return r - for c in [ 10000, 50000, 100000, 250000 ]: + for c in [10000, 50000, 100000, 250000]: start_time = time.time() print "big_table2 frame [chunk->%s]" % c rows = f(c) - print "big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows,c,time.time()-start_time) + print "big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows, c, time.time() - start_time) finally: os.remove(fn) @@ -680,10 +713,11 @@ def test_big_table_panel(self): raise nose.SkipTest('no big table panel') # create and write a big table - wp = Panel(np.random.randn(20, 1000, 1000), items= [ 'Item%03d' % i for i in xrange(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in xrange(1000) ]) + wp = Panel( + np.random.randn(20, 1000, 1000), items=['Item%03d' % i for i in xrange(20)], + major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in xrange(1000)]) - wp.ix[:,100:200,300:400] = np.nan + wp.ix[:, 100:200, 300:400] = np.nan for x in range(100): wp['String%03d'] = 'string%03d' % x @@ -692,14 +726,14 @@ def test_big_table_panel(self): x = time.time() try: store = HDFStore(self.scratchpath) - store.prof_append('wp',wp) + store.prof_append('wp', wp) rows = store.root.wp.table.nrows recons = store.select('wp') finally: store.close() os.remove(self.scratchpath) - print "\nbig_table panel [%s] -> %5.2f" % (rows,time.time()-x) + print "\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x) def test_append_diff_item_order(self): raise nose.SkipTest('append diff item order') @@ -721,18 +755,18 @@ def test_append_hierarchical(self): df = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) - self.store.append('mi',df) + self.store.append('mi', df) result = self.store.select('mi') tm.assert_frame_equal(result, df) def test_append_misc(self): df = tm.makeDataFrame() - self.store.append('df',df,chunksize=1) + self.store.append('df', df, chunksize=1) result = self.store.select('df') tm.assert_frame_equal(result, df) - self.store.append('df1',df,expectedrows=10) + self.store.append('df1', df, expectedrows=10) result = self.store.select('df1') tm.assert_frame_equal(result, df) @@ -746,11 +780,11 @@ def test_table_index_incompatible_dtypes(self): table=True, append=True) def test_table_values_dtypes_roundtrip(self): - df1 = DataFrame({'a': [1, 2, 3]}, dtype = 'f8') + df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8') self.store.append('df1', df1) assert df1.dtypes == self.store['df1'].dtypes - df2 = DataFrame({'a': [1, 2, 3]}, dtype = 'i8') + df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8') self.store.append('df2', df2) assert df2.dtypes == self.store['df2'].dtypes @@ -770,9 +804,9 @@ def test_table_mixed_dtypes(self): df['int2'] = 2 df['timestamp1'] = Timestamp('20010102') df['timestamp2'] = Timestamp('20010103') - df['datetime1'] = datetime.datetime(2001,1,2,0,0) - df['datetime2'] = datetime.datetime(2001,1,3,0,0) - df.ix[3:6,['obj1']] = np.nan + df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) + df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) + df.ix[3:6, ['obj1']] = np.nan df = df.consolidate().convert_objects() self.store.append('df1_mixed', df) @@ -800,22 +834,23 @@ def test_table_mixed_dtypes(self): wp['int1'] = 1 wp['int2'] = 2 wp = wp.consolidate() - + self.store.append('p4d_mixed', wp) tm.assert_panel4d_equal(self.store.select('p4d_mixed'), wp) def test_unimplemented_dtypes_table_columns(self): #### currently not supported dtypes #### - for n,f in [ ('unicode',u'\u03c3'), ('date',datetime.date(2001,1,2)) ]: + for n, f in [('unicode', u'\u03c3'), ('date', datetime.date(2001, 1, 2))]: df = tm.makeDataFrame() df[n] = f - self.assertRaises(NotImplementedError, self.store.append, 'df1_%s' % n, df) + self.assertRaises( + NotImplementedError, self.store.append, 'df1_%s' % n, df) # frame df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' - df['datetime1'] = datetime.date(2001,1,2) + df['datetime1'] = datetime.date(2001, 1, 2) df = df.consolidate().convert_objects() # this fails because we have a date in the object block...... @@ -855,7 +890,7 @@ def test_remove(self): def test_remove_where(self): # non-existance - crit1 = Term('index','>','foo') + crit1 = Term('index', '>', 'foo') self.store.remove('a', where=[crit1]) # try to remove non-table (with crit) @@ -864,8 +899,8 @@ def test_remove_where(self): self.store.put('wp', wp, table=True) self.store.remove('wp', [('minor_axis', ['A', 'D'])]) rs = self.store.select('wp') - expected = wp.reindex(minor_axis = ['B','C']) - tm.assert_panel_equal(rs,expected) + expected = wp.reindex(minor_axis=['B', 'C']) + tm.assert_panel_equal(rs, expected) # empty where self.store.remove('wp') @@ -882,30 +917,29 @@ def test_remove_where(self): 'wp', ['foo']) # selectin non-table with a where - #self.store.put('wp2', wp, table=False) - #self.assertRaises(Exception, self.store.remove, + # self.store.put('wp2', wp, table=False) + # self.assertRaises(Exception, self.store.remove, # 'wp2', [('column', ['A', 'D'])]) - def test_remove_crit(self): wp = tm.makePanel() # group row removal - date4 = wp.major_axis.take([ 0,1,2,4,5,6,8,9,10 ]) - crit4 = Term('major_axis',date4) + date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) + crit4 = Term('major_axis', date4) self.store.put('wp3', wp, table=True) n = self.store.remove('wp3', where=[crit4]) assert(n == 36) result = self.store.select('wp3') - expected = wp.reindex(major_axis = wp.major_axis-date4) + expected = wp.reindex(major_axis=wp.major_axis - date4) tm.assert_panel_equal(result, expected) # upper half self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] - crit1 = Term('major_axis','>',date) - crit2 = Term('minor_axis',['A', 'D']) + crit1 = Term('major_axis', '>', date) + crit2 = Term('minor_axis', ['A', 'D']) n = self.store.remove('wp', where=[crit1]) assert(n == 56) @@ -921,32 +955,35 @@ def test_remove_crit(self): self.store.put('wp2', wp, table=True) date1 = wp.major_axis[1:3] - crit1 = Term('major_axis',date1) + crit1 = Term('major_axis', date1) self.store.remove('wp2', where=[crit1]) result = self.store.select('wp2') - expected = wp.reindex(major_axis=wp.major_axis-date1) + expected = wp.reindex(major_axis=wp.major_axis - date1) tm.assert_panel_equal(result, expected) date2 = wp.major_axis[5] - crit2 = Term('major_axis',date2) + crit2 = Term('major_axis', date2) self.store.remove('wp2', where=[crit2]) result = self.store['wp2'] - expected = wp.reindex(major_axis=wp.major_axis-date1-Index([date2])) + expected = wp.reindex( + major_axis=wp.major_axis - date1 - Index([date2])) tm.assert_panel_equal(result, expected) - date3 = [wp.major_axis[7],wp.major_axis[9]] - crit3 = Term('major_axis',date3) + date3 = [wp.major_axis[7], wp.major_axis[9]] + crit3 = Term('major_axis', date3) self.store.remove('wp2', where=[crit3]) result = self.store['wp2'] - expected = wp.reindex(major_axis=wp.major_axis-date1-Index([date2])-Index(date3)) + expected = wp.reindex( + major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3)) tm.assert_panel_equal(result, expected) # corners self.store.put('wp4', wp, table=True) - n = self.store.remove('wp4', where=[Term('major_axis','>',wp.major_axis[-1])]) + n = self.store.remove( + 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])]) result = self.store.select('wp4') tm.assert_panel_equal(result, wp) - + def test_terms(self): wp = tm.makePanel() @@ -956,10 +993,10 @@ def test_terms(self): # some invalid terms terms = [ - [ 'minor', ['A','B'] ], - [ 'index', ['20121114'] ], - [ 'index', ['20121114', '20121114'] ], - ] + ['minor', ['A', 'B']], + ['index', ['20121114']], + ['index', ['20121114', '20121114']], + ] for t in terms: self.assertRaises(Exception, self.store.select, 'wp', t) @@ -970,44 +1007,48 @@ def test_terms(self): self.assertRaises(Exception, Term.__init__, 'index', '>', 5) # panel - result = self.store.select('wp',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']) ]) + result = self.store.select('wp', [Term( + 'major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) # p4d - result = self.store.select('p4d',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']), Term('items', '=', ['ItemA','ItemB']) ]) - expected = p4d.truncate(after='20000108').reindex(minor=['A', 'B'],items=['ItemA','ItemB']) + result = self.store.select('p4d', [Term('major_axis<20000108'), + Term('minor_axis', '=', ['A', 'B']), + Term('items', '=', ['ItemA', 'ItemB'])]) + expected = p4d.truncate(after='20000108').reindex( + minor=['A', 'B'], items=['ItemA', 'ItemB']) tm.assert_panel4d_equal(result, expected) # valid terms terms = [ - dict(field = 'major_axis', op = '>', value = '20121114'), + dict(field='major_axis', op='>', value='20121114'), ('major_axis', '20121114'), ('major_axis', '>', '20121114'), - (('major_axis', ['20121114','20121114']),), - ('major_axis', datetime.datetime(2012,11,14)), + (('major_axis', ['20121114', '20121114']),), + ('major_axis', datetime.datetime(2012, 11, 14)), 'major_axis> 20121114', 'major_axis >20121114', 'major_axis > 20121114', - (('minor_axis', ['A','B']),), - (('minor_axis', ['A','B']),), - ((('minor_axis', ['A','B']),),), - (('items', ['ItemA','ItemB']),), + (('minor_axis', ['A', 'B']),), + (('minor_axis', ['A', 'B']),), + ((('minor_axis', ['A', 'B']),),), + (('items', ['ItemA', 'ItemB']),), ('items=ItemA'), - ] + ] for t in terms: - self.store.select('wp', t) - self.store.select('p4d', t) + self.store.select('wp', t) + self.store.select('p4d', t) # valid for p4d only terms = [ - (('labels', '=', ['l1','l2']),), - Term('labels', '=', ['l1','l2']), - ] + (('labels', '=', ['l1', 'l2']),), + Term('labels', '=', ['l1', 'l2']), + ] for t in terms: - self.store.select('p4d', t) + self.store.select('p4d', t) def test_series(self): s = tm.makeStringSeries() @@ -1079,7 +1120,7 @@ def test_float_index(self): def test_tuple_index(self): # GH #492 col = np.arange(10) - idx = [(0.,1.), (2., 3.), (4., 5.)] + idx = [(0., 1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) @@ -1087,7 +1128,7 @@ def test_tuple_index(self): def test_index_types(self): values = np.random.randn(2) - func = lambda l, r : tm.assert_series_equal(l, r, True, True, True) + func = lambda l, r: tm.assert_series_equal(l, r, True, True, True) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) @@ -1110,7 +1151,8 @@ def test_index_types(self): ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) - ser = Series(values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)]) + ser = Series(values, [datetime.datetime( + 2012, 1, 1), datetime.datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): @@ -1137,7 +1179,7 @@ def test_frame(self): self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, - compression=True) + compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) @@ -1337,45 +1379,46 @@ def test_select(self): self.store.select('wp2') # selection on the non-indexable with a large number of columns - wp = Panel(np.random.randn(100, 100, 100), items = [ 'Item%03d' % i for i in xrange(100) ], - major_axis=date_range('1/1/2000', periods=100), minor_axis = [ 'E%03d' % i for i in xrange(100) ]) + wp = Panel( + np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)], + major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)]) self.store.remove('wp') self.store.append('wp', wp) - items = [ 'Item%03d' % i for i in xrange(80) ] + items = ['Item%03d' % i for i in xrange(80)] result = self.store.select('wp', Term('items', items)) - expected = wp.reindex(items = items) + expected = wp.reindex(items=items) tm.assert_panel_equal(expected, result) # selectin non-table with a where - #self.assertRaises(Exception, self.store.select, + # self.assertRaises(Exception, self.store.select, # 'wp2', ('column', ['A', 'D'])) # select with columns= df = tm.makeTimeDataFrame() self.store.remove('df') - self.store.append('df',df) - result = self.store.select('df', columns = ['A','B']) - expected = df.reindex(columns = ['A','B']) + self.store.append('df', df) + result = self.store.select('df', columns=['A', 'B']) + expected = df.reindex(columns=['A', 'B']) tm.assert_frame_equal(expected, result) # equivalentsly - result = self.store.select('df', [ ('columns', ['A','B']) ]) - expected = df.reindex(columns = ['A','B']) + result = self.store.select('df', [('columns', ['A', 'B'])]) + expected = df.reindex(columns=['A', 'B']) tm.assert_frame_equal(expected, result) # with a data column self.store.remove('df') - self.store.append('df',df, data_columns = ['A']) - result = self.store.select('df', [ 'A > 0' ], columns = ['A','B']) - expected = df[df.A > 0].reindex(columns = ['A','B']) + self.store.append('df', df, data_columns=['A']) + result = self.store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) tm.assert_frame_equal(expected, result) # with a data column, but different columns self.store.remove('df') - self.store.append('df',df, data_columns = ['A']) - result = self.store.select('df', [ 'A > 0' ], columns = ['C','D']) - expected = df[df.A > 0].reindex(columns = ['C','D']) + self.store.append('df', df, data_columns=['A']) + result = self.store.select('df', ['A > 0'], columns=['C', 'D']) + expected = df[df.A > 0].reindex(columns=['C', 'D']) tm.assert_frame_equal(expected, result) def test_panel_select(self): @@ -1383,14 +1426,15 @@ def test_panel_select(self): self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] - crit1 = ('major_axis','>=',date) + crit1 = ('major_axis', '>=', date) crit2 = ('minor_axis', '=', ['A', 'D']) result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) - result = self.store.select('wp', [ 'major_axis>=20000124', ('minor_axis', '=', ['A','B']) ]) + result = self.store.select( + 'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) @@ -1399,9 +1443,9 @@ def test_frame_select(self): self.store.put('frame', df, table=True) date = df.index[len(df) // 2] - crit1 = ('index','>=',date) - crit2 = ('columns',['A', 'D']) - crit3 = ('columns','A') + crit1 = ('index', '>=', date) + crit2 = ('columns', ['A', 'D']) + crit3 = ('columns', 'A') result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] @@ -1414,28 +1458,31 @@ def test_frame_select(self): # other indicies for a frame # integer - df = DataFrame(dict(A = np.random.rand(20), B = np.random.rand(20))) + df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) self.store.append('df_int', df) - self.store.select('df_int', [ Term("index<10"), Term("columns", "=", ["A"]) ]) + self.store.select( + 'df_int', [Term("index<10"), Term("columns", "=", ["A"])]) - df = DataFrame(dict(A = np.random.rand(20), B = np.random.rand(20), index = np.arange(20,dtype='f8'))) + df = DataFrame(dict(A=np.random.rand( + 20), B=np.random.rand(20), index=np.arange(20, dtype='f8'))) self.store.append('df_float', df) - self.store.select('df_float', [ Term("index<10.0"), Term("columns", "=", ["A"]) ]) + self.store.select( + 'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])]) # invalid terms df = tm.makeTimeDataFrame() self.store.append('df_time', df) - self.assertRaises(Exception, self.store.select, 'df_time', [ Term("index>0") ]) + self.assertRaises( + Exception, self.store.select, 'df_time', [Term("index>0")]) # can't select if not written as table - #self.store['frame'] = df - #self.assertRaises(Exception, self.store.select, + # self.store['frame'] = df + # self.assertRaises(Exception, self.store.select, # 'frame', [crit1, crit2]) def test_unique(self): df = tm.makeTimeDataFrame() - def check(x, y): self.assert_((np.unique(x) == np.unique(y)).all() == True) @@ -1443,29 +1490,30 @@ def check(x, y): self.store.append('df', df) # error - self.assertRaises(KeyError, self.store.unique, 'df','foo') + self.assertRaises(KeyError, self.store.unique, 'df', 'foo') # valid - result = self.store.unique('df','index') - check(result.values,df.index.values) + result = self.store.unique('df', 'index') + check(result.values, df.index.values) # not a data indexable column - self.assertRaises(ValueError, self.store.unique, 'df','values_block_0') + self.assertRaises( + ValueError, self.store.unique, 'df', 'values_block_0') # a data column df2 = df.copy() df2['string'] = 'foo' - self.store.append('df2',df2,data_columns = ['string']) - result = self.store.unique('df2','string') - check(result.values,df2['string'].unique()) + self.store.append('df2', df2, data_columns=['string']) + result = self.store.unique('df2', 'string') + check(result.values, df2['string'].unique()) # a data column with NaNs, result excludes the NaNs df3 = df.copy() df3['string'] = 'foo' - df3.ix[4:6,'string'] = np.nan - self.store.append('df3',df3,data_columns = ['string']) - result = self.store.unique('df3','string') - check(result.values,df3['string'].valid().unique()) + df3.ix[4:6, 'string'] = np.nan + self.store.append('df3', df3, data_columns=['string']) + result = self.store.unique('df3', 'string') + check(result.values, df3['string'].valid().unique()) def test_coordinates(self): df = tm.makeTimeDataFrame() @@ -1480,100 +1528,113 @@ def test_coordinates(self): # get coordinates back & test vs frame self.store.remove('df') - df = DataFrame(dict(A = range(5), B = range(5))) + df = DataFrame(dict(A=range(5), B=range(5))) self.store.append('df', df) - c = self.store.select_as_coordinates('df',[ 'index<3' ]) + c = self.store.select_as_coordinates('df', ['index<3']) assert((c.values == np.arange(3)).all() == True) - result = self.store.select('df', where = c) - expected = df.ix[0:2,:] - tm.assert_frame_equal(result,expected) + result = self.store.select('df', where=c) + expected = df.ix[0:2, :] + tm.assert_frame_equal(result, expected) - c = self.store.select_as_coordinates('df', [ 'index>=3', 'index<=4' ]) - assert((c.values == np.arange(2)+3).all() == True) - result = self.store.select('df', where = c) - expected = df.ix[3:4,:] - tm.assert_frame_equal(result,expected) + c = self.store.select_as_coordinates('df', ['index>=3', 'index<=4']) + assert((c.values == np.arange(2) + 3).all() == True) + result = self.store.select('df', where=c) + expected = df.ix[3:4, :] + tm.assert_frame_equal(result, expected) # multiple tables self.store.remove('df1') self.store.remove('df2') df1 = tm.makeTimeDataFrame() - df2 = tm.makeTimeDataFrame().rename(columns = lambda x: "%s_2" % x) - self.store.append('df1',df1, data_columns = ['A','B']) - self.store.append('df2',df2) + df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) + self.store.append('df1', df1, data_columns=['A', 'B']) + self.store.append('df2', df2) - c = self.store.select_as_coordinates('df1', [ 'A>0','B>0' ]) - df1_result = self.store.select('df1',c) - df2_result = self.store.select('df2',c) - result = concat([ df1_result, df2_result ], axis=1) + c = self.store.select_as_coordinates('df1', ['A>0', 'B>0']) + df1_result = self.store.select('df1', c) + df2_result = self.store.select('df2', c) + result = concat([df1_result, df2_result], axis=1) - expected = concat([ df1, df2 ], axis=1) + expected = concat([df1, df2], axis=1) expected = expected[(expected.A > 0) & (expected.B > 0)] tm.assert_frame_equal(result, expected) def test_append_to_multiple(self): df1 = tm.makeTimeDataFrame() - df2 = tm.makeTimeDataFrame().rename(columns = lambda x: "%s_2" % x) + df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) df2['foo'] = 'bar' - df = concat([ df1, df2 ], axis=1) + df = concat([df1, df2], axis=1) # exceptions - self.assertRaises(Exception, self.store.append_to_multiple, { 'df1' : ['A','B'], 'df2' : None }, df, selector = 'df3') - self.assertRaises(Exception, self.store.append_to_multiple, { 'df1' : None, 'df2' : None }, df, selector = 'df3') - self.assertRaises(Exception, self.store.append_to_multiple, 'df1', df, 'df1') + self.assertRaises(Exception, self.store.append_to_multiple, {'df1': + ['A', 'B'], 'df2': None}, df, selector='df3') + self.assertRaises(Exception, self.store.append_to_multiple, + {'df1': None, 'df2': None}, df, selector='df3') + self.assertRaises( + Exception, self.store.append_to_multiple, 'df1', df, 'df1') # regular operation - self.store.append_to_multiple({ 'df1' : ['A','B'], 'df2' : None }, df, selector = 'df1') - result = self.store.select_as_multiple(['df1','df2'], where = [ 'A>0','B>0' ], selector = 'df1') + self.store.append_to_multiple( + {'df1': ['A', 'B'], 'df2': None}, df, selector='df1') + result = self.store.select_as_multiple( + ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1') expected = df[(df.A > 0) & (df.B > 0)] tm.assert_frame_equal(result, expected) - def test_select_as_multiple(self): df1 = tm.makeTimeDataFrame() - df2 = tm.makeTimeDataFrame().rename(columns = lambda x: "%s_2" % x) + df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) df2['foo'] = 'bar' - self.store.append('df1',df1, data_columns = ['A','B']) - self.store.append('df2',df2) + self.store.append('df1', df1, data_columns=['A', 'B']) + self.store.append('df2', df2) # exceptions - self.assertRaises(Exception, self.store.select_as_multiple, None, where = [ 'A>0','B>0' ], selector = 'df1') - self.assertRaises(Exception, self.store.select_as_multiple, [ None ], where = [ 'A>0','B>0' ], selector = 'df1') + self.assertRaises(Exception, self.store.select_as_multiple, + None, where=['A>0', 'B>0'], selector='df1') + self.assertRaises(Exception, self.store.select_as_multiple, + [None], where=['A>0', 'B>0'], selector='df1') # default select - result = self.store.select('df1', ['A>0','B>0']) - expected = self.store.select_as_multiple([ 'df1' ], where = [ 'A>0','B>0' ], selector = 'df1') + result = self.store.select('df1', ['A>0', 'B>0']) + expected = self.store.select_as_multiple( + ['df1'], where=['A>0', 'B>0'], selector='df1') tm.assert_frame_equal(result, expected) - expected = self.store.select_as_multiple( 'df1' , where = [ 'A>0','B>0' ], selector = 'df1') + expected = self.store.select_as_multiple( + 'df1', where=['A>0', 'B>0'], selector='df1') tm.assert_frame_equal(result, expected) # multiple - result = self.store.select_as_multiple(['df1','df2'], where = [ 'A>0','B>0' ], selector = 'df1') - expected = concat([ df1, df2 ], axis=1) + result = self.store.select_as_multiple( + ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1') + expected = concat([df1, df2], axis=1) expected = expected[(expected.A > 0) & (expected.B > 0)] tm.assert_frame_equal(result, expected) # multiple (diff selector) - result = self.store.select_as_multiple(['df1','df2'], where = [ Term('index', '>', df2.index[4]) ], selector = 'df2') - expected = concat([ df1, df2 ], axis=1) + result = self.store.select_as_multiple(['df1', 'df2'], where=[Term( + 'index', '>', df2.index[4])], selector='df2') + expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) # test excpection for diff rows - self.store.append('df3',tm.makeTimeDataFrame(nper=50)) - self.assertRaises(Exception, self.store.select_as_multiple, ['df1','df3'], where = [ 'A>0','B>0' ], selector = 'df1') + self.store.append('df3', tm.makeTimeDataFrame(nper=50)) + self.assertRaises(Exception, self.store.select_as_multiple, ['df1', + 'df3'], where=['A>0', 'B>0'], selector='df1') def test_start_stop(self): - - df = DataFrame(dict(A = np.random.rand(20), B = np.random.rand(20))) + + df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) self.store.append('df', df) - result = self.store.select('df', [ Term("columns", "=", ["A"]) ], start=0, stop=5) - expected = df.ix[0:4,['A']] + result = self.store.select( + 'df', [Term("columns", "=", ["A"])], start=0, stop=5) + expected = df.ix[0:4, ['A']] tm.assert_frame_equal(result, expected) # out of range - result = self.store.select('df', [ Term("columns", "=", ["A"]) ], start=30, stop=40) + result = self.store.select( + 'df', [Term("columns", "=", ["A"])], start=30, stop=40) assert(len(result) == 0) assert(type(result) == DataFrame) @@ -1652,12 +1713,13 @@ def test_legacy_table_read(self): store.select('wp1') # force the frame - store.select('df2', typ = 'legacy_frame') + store.select('df2', typ='legacy_frame') # old version warning import warnings warnings.filterwarnings('ignore', category=IncompatibilityWarning) - self.assertRaises(Exception, store.select, 'wp1', Term('minor_axis','=','B')) + self.assertRaises( + Exception, store.select, 'wp1', Term('minor_axis', '=', 'B')) df2 = store.select('df2') store.select('df2', Term('index', '>', df2.index[2])) @@ -1681,10 +1743,10 @@ def test_legacy_table_write(self): wp = tm.makePanel() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a') - + self.assertRaises(Exception, store.append, 'df1', df) self.assertRaises(Exception, store.append, 'wp1', wp) - + store.close() def test_store_datetime_fractional_secs(self): @@ -1738,12 +1800,13 @@ def test_unicode_index(self): self._check_roundtrip(s, tm.assert_series_equal) def test_store_datetime_mixed(self): - df = DataFrame({'a': [1,2,3], 'b': [1.,2.,3.], 'c': ['a', 'b', 'c']}) + df = DataFrame( + {'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']}) ts = tm.makeTimeSeries() df['d'] = ts.index[:3] self._check_roundtrip(df, tm.assert_frame_equal) - #def test_cant_write_multiindex_table(self): + # def test_cant_write_multiindex_table(self): # # for now, #1848 # df = DataFrame(np.random.randn(10, 4), # index=[np.arange(5).repeat(2), @@ -1751,10 +1814,12 @@ def test_store_datetime_mixed(self): # self.assertRaises(Exception, self.store.put, 'foo', df, table=True) + def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth + def _test_sort(obj): if isinstance(obj, DataFrame): return obj.reindex(sorted(obj.index)) @@ -1765,5 +1830,5 @@ def _test_sort(obj): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index a828c946686c5..b2348ec390648 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -9,8 +9,10 @@ import pandas.util.testing as tm from pandas import Series, Index, DataFrame + class TestSQLite(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.db = sqlite3.connect(':memory:') @@ -133,8 +135,6 @@ def _check_roundtrip(self, frame): expected.index = Index(range(len(frame2))) + 10 tm.assert_frame_equal(expected, result) - - def test_tquery(self): frame = tm.makeTimeDataFrame() sql.write_frame(frame, name='test_table', con=self.db) @@ -174,14 +174,14 @@ def test_uquery(self): def test_keyword_as_column_names(self): ''' ''' - df = DataFrame({'From':np.ones(5)}) - #print sql.get_sqlite_schema(df, 'testkeywords') - sql.write_frame(df, con = self.db, name = 'testkeywords') + df = DataFrame({'From': np.ones(5)}) + # print sql.get_sqlite_schema(df, 'testkeywords') + sql.write_frame(df, con=self.db, name='testkeywords') if __name__ == '__main__': # unittest.main() import nose # nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'], # exit=False) - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_wb.py b/pandas/io/tests/test_wb.py index 9d854e63fee0e..46eeabaf1e209 100644 --- a/pandas/io/tests/test_wb.py +++ b/pandas/io/tests/test_wb.py @@ -6,35 +6,37 @@ from numpy.testing.decorators import slow from pandas.io.wb import search, download + @slow @network def test_wdi_search(): raise nose.SkipTest expected = {u'id': {2634: u'GDPPCKD', - 4649: u'NY.GDP.PCAP.KD', - 4651: u'NY.GDP.PCAP.KN', - 4653: u'NY.GDP.PCAP.PP.KD'}, - u'name': {2634: u'GDP per Capita, constant US$, millions', - 4649: u'GDP per capita (constant 2000 US$)', - 4651: u'GDP per capita (constant LCU)', - 4653: u'GDP per capita, PPP (constant 2005 international $)'}} - result = search('gdp.*capita.*constant').ix[:,:2] + 4649: u'NY.GDP.PCAP.KD', + 4651: u'NY.GDP.PCAP.KN', + 4653: u'NY.GDP.PCAP.PP.KD'}, + u'name': {2634: u'GDP per Capita, constant US$, millions', + 4649: u'GDP per capita (constant 2000 US$)', + 4651: u'GDP per capita (constant LCU)', + 4653: u'GDP per capita, PPP (constant 2005 international $)'}} + result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) expected.index = result.index assert_frame_equal(result, expected) + @slow @network def test_wdi_download(): raise nose.SkipTest expected = {'GDPPCKN': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'37857.1261134552', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'37081.4575704003', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'72720.0691255285', (u'Mexico', u'2004'): u'74751.6003347038', (u'Mexico', u'2005'): u'76200.2154469437', (u'Canada', u'2005'): u'38617.4563629611'}, 'GDPPCKD': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'34397.055116118', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'33692.2812368928', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'7608.43848670658', (u'Mexico', u'2004'): u'7820.99026814334', (u'Mexico', u'2005'): u'7972.55364129367', (u'Canada', u'2005'): u'35087.8925933298'}} expected = pandas.DataFrame(expected) - result = download(country=['CA','MX','US', 'junk'], indicator=['GDPPCKD', - 'GDPPCKN', 'junk'], start=2003, end=2005) + result = download(country=['CA', 'MX', 'US', 'junk'], indicator=['GDPPCKD', + 'GDPPCKN', 'junk'], start=2003, end=2005) expected.index = result.index assert_frame_equal(result, pandas.DataFrame(expected)) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_yahoo.py b/pandas/io/tests/test_yahoo.py index 9b15e19dd5410..89c650316468c 100644 --- a/pandas/io/tests/test_yahoo.py +++ b/pandas/io/tests/test_yahoo.py @@ -11,6 +11,7 @@ from numpy.testing.decorators import slow import urllib2 + class TestYahoo(unittest.TestCase): @slow @@ -19,8 +20,8 @@ def test_yahoo(self): # asserts that yahoo is minimally working and that it throws # an excecption when DataReader can't get a 200 response from # yahoo - start = datetime(2010,1,1) - end = datetime(2012,1,24) + start = datetime(2010, 1, 1) + end = datetime(2012, 1, 24) try: self.assertEquals( @@ -41,5 +42,5 @@ def test_yahoo(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 1270d9b0dd28f..1a2108d069589 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -4,46 +4,47 @@ import pandas import numpy as np -def download(country=['MX','CA','US'], indicator=['GDPPCKD','GDPPCKN'], - start=2003, end=2005): + +def download(country=['MX', 'CA', 'US'], indicator=['GDPPCKD', 'GDPPCKN'], + start=2003, end=2005): """ Download data series from the World Bank's World Development Indicators Parameters ---------- - - indicator: string or list of strings + + indicator: string or list of strings taken from the ``id`` field in ``WDIsearch()`` - country: string or list of strings. - ``all`` downloads data for all countries + country: string or list of strings. + ``all`` downloads data for all countries ISO-2 character codes select individual countries (e.g.``US``,``CA``) - start: int + start: int First year of the data series - end: int + end: int Last year of the data series (inclusive) - + Returns ------- - ``pandas`` DataFrame with columns: country, iso2c, year, indicator value. + ``pandas`` DataFrame with columns: country, iso2c, year, indicator value. """ # Are ISO-2 country codes valid? valid_countries = ["AG", "AL", "AM", "AO", "AR", "AT", "AU", "AZ", "BB", - "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BO", "BR", "BS", "BW", - "BY", "BZ", "CA", "CD", "CF", "CG", "CH", "CI", "CL", "CM", "CN", - "CO", "CR", "CV", "CY", "CZ", "DE", "DK", "DM", "DO", "DZ", "EC", - "EE", "EG", "ER", "ES", "ET", "FI", "FJ", "FR", "GA", "GB", "GE", - "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HK", "HN", "HR", - "HT", "HU", "ID", "IE", "IL", "IN", "IR", "IS", "IT", "JM", "JO", - "JP", "KE", "KG", "KH", "KM", "KR", "KW", "KZ", "LA", "LB", "LC", - "LK", "LS", "LT", "LU", "LV", "MA", "MD", "MG", "MK", "ML", "MN", - "MR", "MU", "MW", "MX", "MY", "MZ", "NA", "NE", "NG", "NI", "NL", - "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PT", - "PY", "RO", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG", "SI", - "SK", "SL", "SN", "SR", "SV", "SY", "SZ", "TD", "TG", "TH", "TN", - "TR", "TT", "TW", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", - "VN", "VU", "YE", "ZA", "ZM", "ZW", "all"] + "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BO", "BR", "BS", "BW", + "BY", "BZ", "CA", "CD", "CF", "CG", "CH", "CI", "CL", "CM", "CN", + "CO", "CR", "CV", "CY", "CZ", "DE", "DK", "DM", "DO", "DZ", "EC", + "EE", "EG", "ER", "ES", "ET", "FI", "FJ", "FR", "GA", "GB", "GE", + "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HK", "HN", "HR", + "HT", "HU", "ID", "IE", "IL", "IN", "IR", "IS", "IT", "JM", "JO", + "JP", "KE", "KG", "KH", "KM", "KR", "KW", "KZ", "LA", "LB", "LC", + "LK", "LS", "LT", "LU", "LV", "MA", "MD", "MG", "MK", "ML", "MN", + "MR", "MU", "MW", "MX", "MY", "MZ", "NA", "NE", "NG", "NI", "NL", + "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PT", + "PY", "RO", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG", "SI", + "SK", "SL", "SN", "SR", "SV", "SY", "SZ", "TD", "TG", "TH", "TN", + "TR", "TT", "TW", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", + "VN", "VU", "YE", "ZA", "ZM", "ZW", "all"] if type(country) == str: country = [country] bad_countries = np.setdiff1d(country, valid_countries) @@ -51,17 +52,17 @@ def download(country=['MX','CA','US'], indicator=['GDPPCKD','GDPPCKN'], country = ';'.join(country) # Work with a list of indicators if type(indicator) == str: - indicator = [indicator] + indicator = [indicator] # Download - data = [] + data = [] bad_indicators = [] for ind in indicator: - try: + try: tmp = _get_data(ind, country, start, end) tmp.columns = ['country', 'iso2c', 'year', ind] data.append(tmp) except: - bad_indicators.append(ind) + bad_indicators.append(ind) # Warn if len(bad_indicators) > 0: print 'Failed to obtain indicator(s): ' + '; '.join(bad_indicators) @@ -70,15 +71,15 @@ def download(country=['MX','CA','US'], indicator=['GDPPCKD','GDPPCKN'], print 'Invalid ISO-2 codes: ' + ' '.join(bad_countries) # Merge WDI series if len(data) > 0: - out = reduce(lambda x,y: x.merge(y, how='outer'), data) + out = reduce(lambda x, y: x.merge(y, how='outer'), data) # Clean out = out.drop('iso2c', axis=1) out = out.set_index(['country', 'year']) return out -def _get_data(indicator = "NY.GNS.ICTR.GN.ZS", country = 'US', - start = 2002, end = 2005): +def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', + start=2002, end=2005): # Build URL for api call url = "http://api.worldbank.org/countries/" + country + "/indicators/" + \ indicator + "?date=" + str(start) + ":" + str(end) + "&per_page=25000" + \ @@ -95,7 +96,7 @@ def _get_data(indicator = "NY.GNS.ICTR.GN.ZS", country = 'US', # Prepare output out = pandas.DataFrame([country, iso2c, year, value]).T return out - + def get_countries(): '''Query information about countries @@ -109,7 +110,7 @@ def get_countries(): data.incomeLevel = map(lambda x: x['value'], data.incomeLevel) data.lendingType = map(lambda x: x['value'], data.lendingType) data.region = map(lambda x: x['value'], data.region) - data = data.rename(columns={'id':'iso3c', 'iso2Code':'iso2c'}) + data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'}) return data @@ -126,8 +127,9 @@ def get_indicators(): fun = lambda x: x.encode('ascii', 'ignore') data.sourceOrganization = data.sourceOrganization.apply(fun) # Clean topic field + def get_value(x): - try: + try: return x['value'] except: return '' @@ -141,6 +143,8 @@ def get_value(x): _cached_series = None + + def search(string='gdp.*capi', field='name', case=False): """ Search available data series from the world bank @@ -150,7 +154,7 @@ def search(string='gdp.*capi', field='name', case=False): string: string regular expression - field: string + field: string id, name, source, sourceNote, sourceOrganization, topics See notes below case: bool @@ -158,20 +162,20 @@ def search(string='gdp.*capi', field='name', case=False): Notes ----- - + The first time this function is run it will download and cache the full list of available series. Depending on the speed of your network connection, this can take time. Subsequent searches will use the cached - copy, so they should be much faster. + copy, so they should be much faster. id : Data series indicator (for use with the ``indicator`` argument of ``WDI()``) e.g. NY.GNS.ICTR.GN.ZS" - name: Short description of the data series - source: Data collection project - sourceOrganization: Data collection organization - note: - sourceNote: - topics: + name: Short description of the data series + source: Data collection project + sourceOrganization: Data collection organization + note: + sourceNote: + topics: """ # Create cached list of series if it does not exist global _cached_series diff --git a/pandas/rpy/base.py b/pandas/rpy/base.py index 0c80448684697..4cd86d3c3f4e3 100644 --- a/pandas/rpy/base.py +++ b/pandas/rpy/base.py @@ -10,4 +10,3 @@ class lm(object): """ def __init__(self, formula, data): pass - diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py index d2a9eaefffd1b..acc562925c925 100644 --- a/pandas/rpy/common.py +++ b/pandas/rpy/common.py @@ -219,10 +219,11 @@ def convert_to_r_posixct(obj): vals = robj.vectors.FloatSexpVector(obj.values.view('i8') / 1E9) as_posixct = robj.baseenv.get('as.POSIXct') origin = StrSexpVector([time.strftime("%Y-%m-%d", - time.gmtime(0)),]) + time.gmtime(0)), ]) # We will be sending ints as UTC - tz = obj.tz.zone if hasattr(obj, 'tz') and hasattr(obj.tz, 'zone') else 'UTC' + tz = obj.tz.zone if hasattr( + obj, 'tz') and hasattr(obj.tz, 'zone') else 'UTC' tz = StrSexpVector([tz]) utc_tz = StrSexpVector(['UTC']) @@ -232,14 +233,14 @@ def convert_to_r_posixct(obj): VECTOR_TYPES = {np.float64: robj.FloatVector, - np.float32: robj.FloatVector, - np.float: robj.FloatVector, - np.int: robj.IntVector, - np.int32: robj.IntVector, - np.int64: robj.IntVector, - np.object_: robj.StrVector, - np.str: robj.StrVector, - np.bool: robj.BoolVector} + np.float32: robj.FloatVector, + np.float: robj.FloatVector, + np.int: robj.IntVector, + np.int32: robj.IntVector, + np.int64: robj.IntVector, + np.object_: robj.StrVector, + np.str: robj.StrVector, + np.bool: robj.BoolVector} NA_TYPES = {np.float64: robj.NA_Real, np.float32: robj.NA_Real, @@ -271,7 +272,7 @@ def convert_to_r_dataframe(df, strings_as_factors=False): columns = rlc.OrdDict() - #FIXME: This doesn't handle MultiIndex + # FIXME: This doesn't handle MultiIndex for column in df: value = df[column] @@ -379,7 +380,7 @@ def test_convert_r_dataframe(): seriesd = _test.getSeriesData() frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) - #Null data + # Null data frame["E"] = [np.nan for item in frame["A"]] # Some mixed type data frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] @@ -411,7 +412,7 @@ def test_convert_r_matrix(): seriesd = _test.getSeriesData() frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) - #Null data + # Null data frame["E"] = [np.nan for item in frame["A"]] r_dataframe = convert_to_r_matrix(frame) @@ -429,7 +430,7 @@ def test_convert_r_matrix(): # Pandas bug 1282 frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] - #FIXME: Ugly, this whole module needs to be ported to nose/unittest + # FIXME: Ugly, this whole module needs to be ported to nose/unittest try: wrong_matrix = convert_to_r_matrix(frame) except TypeError: diff --git a/pandas/sandbox/qtpandas.py b/pandas/sandbox/qtpandas.py index 50d183dbed5fd..35aa28fea1678 100644 --- a/pandas/sandbox/qtpandas.py +++ b/pandas/sandbox/qtpandas.py @@ -3,20 +3,21 @@ @author: Jev Kuznetsov ''' -from PyQt4.QtCore import (QAbstractTableModel,Qt,QVariant,QModelIndex, SIGNAL) -from PyQt4.QtGui import (QApplication,QDialog,QVBoxLayout, QTableView, QWidget) +from PyQt4.QtCore import ( + QAbstractTableModel, Qt, QVariant, QModelIndex, SIGNAL) +from PyQt4.QtGui import ( + QApplication, QDialog, QVBoxLayout, QTableView, QWidget) from pandas import DataFrame, Index - class DataFrameModel(QAbstractTableModel): ''' data model for a DataFrame class ''' def __init__(self): - super(DataFrameModel,self).__init__() + super(DataFrameModel, self).__init__() self.df = DataFrame() - def setDataFrame(self,dataFrame): + def setDataFrame(self, dataFrame): self.df = dataFrame def signalUpdate(self): @@ -25,7 +26,7 @@ def signalUpdate(self): self.layoutChanged.emit() #------------- table display functions ----------------- - def headerData(self,section,orientation,role=Qt.DisplayRole): + def headerData(self, section, orientation, role=Qt.DisplayRole): if role != Qt.DisplayRole: return QVariant() @@ -36,7 +37,7 @@ def headerData(self,section,orientation,role=Qt.DisplayRole): return QVariant() elif orientation == Qt.Vertical: try: - #return self.df.index.tolist() + # return self.df.index.tolist() return self.df.index.tolist()[section] except (IndexError, ): return QVariant() @@ -48,7 +49,7 @@ def data(self, index, role=Qt.DisplayRole): if not index.isValid(): return QVariant() - return QVariant(str(self.df.ix[index.row(),index.column()])) + return QVariant(str(self.df.ix[index.row(), index.column()])) def flags(self, index): flags = super(DataFrameModel, self).flags(index) @@ -59,7 +60,7 @@ def setData(self, index, value, role): self.df.set_value(self.df.index[index.row()], self.df.columns[index.column()], value.toPyObject()) - return True + return True def rowCount(self, index=QModelIndex()): return self.df.shape[0] @@ -70,8 +71,8 @@ def columnCount(self, index=QModelIndex()): class DataFrameWidget(QWidget): ''' a simple widget for using DataFrames in a gui ''' - def __init__(self,dataFrame, parent=None): - super(DataFrameWidget,self).__init__(parent) + def __init__(self, dataFrame, parent=None): + super(DataFrameWidget, self).__init__(parent) self.dataModel = DataFrameModel() self.dataModel.setDataFrame(dataFrame) @@ -84,26 +85,25 @@ def __init__(self,dataFrame, parent=None): layout.addWidget(self.dataTable) self.setLayout(layout) - - def resizeColumnsToContents(self): self.dataTable.resizeColumnsToContents() #-----------------stand alone test code + def testDf(): ''' creates test dataframe ''' - data = {'int':[1,2,3], 'float':[1.5,2.5,3.5], - 'string':['a','b','c'], 'nan':[np.nan,np.nan,np.nan]} - return DataFrame(data, index=Index(['AAA','BBB','CCC']), - columns=['int','float','string','nan']) + data = {'int': [1, 2, 3], 'float': [1.5, 2.5, 3.5], + 'string': ['a', 'b', 'c'], 'nan': [np.nan, np.nan, np.nan]} + return DataFrame(data, index=Index(['AAA', 'BBB', 'CCC']), + columns=['int', 'float', 'string', 'nan']) class Form(QDialog): - def __init__(self,parent=None): - super(Form,self).__init__(parent) + def __init__(self, parent=None): + super(Form, self).__init__(parent) - df = testDf() # make up some data + df = testDf() # make up some data widget = DataFrameWidget(df) widget.resizeColumnsToContents() @@ -111,7 +111,7 @@ def __init__(self,parent=None): layout.addWidget(widget) self.setLayout(layout) -if __name__=='__main__': +if __name__ == '__main__': import sys import numpy as np @@ -119,9 +119,3 @@ def __init__(self,parent=None): form = Form() form.show() app.exec_() - - - - - - diff --git a/pandas/sandbox/stats/rls.py b/pandas/sandbox/stats/rls.py index b873225ccc715..51166500c484f 100644 --- a/pandas/sandbox/stats/rls.py +++ b/pandas/sandbox/stats/rls.py @@ -3,6 +3,7 @@ import numpy as np from scikits.statsmodels.regression import WLS, GLS, RegressionResults + class RLS(GLS): """ Restricted general least squares model that handles linear constraints @@ -57,10 +58,12 @@ def __init__(self, endog, exog, constr, param=0., sigma=None): self.cholsigmainv = np.diag(np.sqrt(sigma)) else: self.sigma = sigma - self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(self.sigma)).T + self.cholsigmainv = np.linalg.cholesky( + np.linalg.pinv(self.sigma)).T super(GLS, self).__init__(endog, exog) _rwexog = None + @property def rwexog(self): """Whitened exogenous variables augmented with restrictions""" @@ -68,15 +71,16 @@ def rwexog(self): P = self.ncoeffs K = self.nconstraint design = np.zeros((P + K, P + K)) - design[:P, :P] = np.dot(self.wexog.T, self.wexog) #top left + design[:P, :P] = np.dot(self.wexog.T, self.wexog) # top left constr = np.reshape(self.constraint, (K, P)) - design[:P, P:] = constr.T #top right partition - design[P:, :P] = constr #bottom left partition - design[P:, P:] = np.zeros((K, K)) #bottom right partition + design[:P, P:] = constr.T # top right partition + design[P:, :P] = constr # bottom left partition + design[P:, P:] = np.zeros((K, K)) # bottom right partition self._rwexog = design return self._rwexog _inv_rwexog = None + @property def inv_rwexog(self): """Inverse of self.rwexog""" @@ -85,6 +89,7 @@ def inv_rwexog(self): return self._inv_rwexog _rwendog = None + @property def rwendog(self): """Whitened endogenous variable augmented with restriction parameters""" @@ -98,6 +103,7 @@ def rwendog(self): return self._rwendog _ncp = None + @property def rnorm_cov_params(self): """Parameter covariance under restrictions""" @@ -107,6 +113,7 @@ def rnorm_cov_params(self): return self._ncp _wncp = None + @property def wrnorm_cov_params(self): """ @@ -123,6 +130,7 @@ def wrnorm_cov_params(self): return self._wncp _coeffs = None + @property def coeffs(self): """Estimated parameters""" diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 5a612498967e4..7d1327955caa4 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -119,7 +119,7 @@ def __new__(cls, data, index=None, sparse_index=None, kind='block', length = len(index) if data == fill_value or (isnull(data) - and isnull(fill_value)): + and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: @@ -160,11 +160,11 @@ def _make_time_series(self): self.__class__ = SparseTimeSeries @classmethod - def from_array(cls, arr, index=None, name=None, copy=False,fill_value=None): + def from_array(cls, arr, index=None, name=None, copy=False, fill_value=None): """ Simplified alternate constructor """ - return SparseSeries(arr, index=index, name=name, copy=copy,fill_value=fill_value) + return SparseSeries(arr, index=index, name=name, copy=copy, fill_value=fill_value) def __init__(self, data, index=None, sparse_index=None, kind='block', fill_value=None, name=None, copy=False): diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 48bd492574f75..cf2cd2f687e8d 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -10,6 +10,7 @@ from pandas.sparse.api import SparseArray from pandas.util.testing import assert_almost_equal + def assert_sp_array_equal(left, right): assert_almost_equal(left.sp_values, right.sp_values) assert(left.sp_index.equals(right.sp_index)) @@ -21,6 +22,7 @@ def assert_sp_array_equal(left, right): class TestSparseArray(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) self.arr = SparseArray(self.arr_data) @@ -150,5 +152,5 @@ def _check_roundtrip(obj): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 2ff417537c3fa..d31f919e2e84b 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -16,37 +16,38 @@ TEST_LENGTH = 20 -plain_case = dict(xloc = [0, 7, 15], - xlen = [3, 5, 5], - yloc = [2, 9, 14], - ylen = [2, 3, 5], - intersect_loc = [2, 9, 15], - intersect_len = [1, 3, 4]) -delete_blocks = dict(xloc = [0, 5], - xlen = [4, 4], - yloc = [1], - ylen = [4], - intersect_loc = [1], - intersect_len = [3]) -split_blocks = dict(xloc = [0], - xlen = [10], - yloc = [0, 5], - ylen = [3, 7], - intersect_loc = [0, 5], - intersect_len = [3, 5]) -skip_block = dict(xloc = [10], - xlen = [5], - yloc = [0, 12], - ylen = [5, 3], - intersect_loc = [12], - intersect_len = [3]) - -no_intersect = dict(xloc = [0, 10], - xlen = [4, 6], - yloc = [5, 17], - ylen = [4, 2], - intersect_loc = [], - intersect_len = []) +plain_case = dict(xloc=[0, 7, 15], + xlen=[3, 5, 5], + yloc=[2, 9, 14], + ylen=[2, 3, 5], + intersect_loc=[2, 9, 15], + intersect_len=[1, 3, 4]) +delete_blocks = dict(xloc=[0, 5], + xlen=[4, 4], + yloc=[1], + ylen=[4], + intersect_loc=[1], + intersect_len=[3]) +split_blocks = dict(xloc=[0], + xlen=[10], + yloc=[0, 5], + ylen=[3, 7], + intersect_loc=[0, 5], + intersect_len=[3, 5]) +skip_block = dict(xloc=[10], + xlen=[5], + yloc=[0, 12], + ylen=[5, 3], + intersect_loc=[12], + intersect_len=[3]) + +no_intersect = dict(xloc=[0, 10], + xlen=[4, 6], + yloc=[5, 17], + ylen=[4, 2], + intersect_loc=[], + intersect_len=[]) + def check_cases(_check_case): def _check_case_dict(case): @@ -63,6 +64,7 @@ def _check_case_dict(case): _check_case([0], [5], [], [], [], []) _check_case([], [], [], [], [], []) + def test_index_make_union(): def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) @@ -83,18 +85,24 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): y: ---- r: -------- """ - xloc = [0]; xlen = [5] - yloc = [5]; ylen = [4] - eloc = [0]; elen = [9] + xloc = [0] + xlen = [5] + yloc = [5] + ylen = [4] + eloc = [0] + elen = [9] _check_case(xloc, xlen, yloc, ylen, eloc, elen) """ x: ----- ----- y: ----- -- """ - xloc = [0, 10]; xlen = [5, 5] - yloc = [2, 17]; ylen = [5, 2] - eloc = [0, 10, 17]; elen = [7, 5, 2] + xloc = [0, 10] + xlen = [5, 5] + yloc = [2, 17] + ylen = [5, 2] + eloc = [0, 10, 17] + elen = [7, 5, 2] _check_case(xloc, xlen, yloc, ylen, eloc, elen) """ @@ -102,9 +110,12 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): y: ------- r: ---------- """ - xloc = [1]; xlen = [5] - yloc = [3]; ylen = [5] - eloc = [1]; elen = [7] + xloc = [1] + xlen = [5] + yloc = [3] + ylen = [5] + eloc = [1] + elen = [7] _check_case(xloc, xlen, yloc, ylen, eloc, elen) """ @@ -112,9 +123,12 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): y: ------- r: ------------- """ - xloc = [2, 10]; xlen = [4, 4] - yloc = [4]; ylen = [8] - eloc = [2]; elen = [12] + xloc = [2, 10] + xlen = [4, 4] + yloc = [4] + ylen = [8] + eloc = [2] + elen = [12] _check_case(xloc, xlen, yloc, ylen, eloc, elen) """ @@ -122,9 +136,12 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): y: ------- r: ------------- """ - xloc = [0, 5]; xlen = [3, 5] - yloc = [0]; ylen = [7] - eloc = [0]; elen = [10] + xloc = [0, 5] + xlen = [3, 5] + yloc = [0] + ylen = [7] + eloc = [0] + elen = [10] _check_case(xloc, xlen, yloc, ylen, eloc, elen) """ @@ -132,9 +149,12 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): y: ------- --- r: ------------- """ - xloc = [2, 10]; xlen = [4, 4] - yloc = [4, 13]; ylen = [8, 4] - eloc = [2]; elen = [15] + xloc = [2, 10] + xlen = [4, 4] + yloc = [4, 13] + ylen = [8, 4] + eloc = [2] + elen = [15] _check_case(xloc, xlen, yloc, ylen, eloc, elen) """ @@ -142,22 +162,29 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): y: ---- ---- --- r: ---------------------- """ - xloc = [2]; xlen = [15] - yloc = [4, 9, 14]; ylen = [3, 2, 2] - eloc = [2]; elen = [15] + xloc = [2] + xlen = [15] + yloc = [4, 9, 14] + ylen = [3, 2, 2] + eloc = [2] + elen = [15] _check_case(xloc, xlen, yloc, ylen, eloc, elen) """ x: ---- --- y: --- --- """ - xloc = [0, 10]; xlen = [3, 3] - yloc = [5, 15]; ylen = [2, 2] - eloc = [0, 5, 10, 15]; elen = [3, 2, 3, 2] + xloc = [0, 10] + xlen = [3, 3] + yloc = [5, 15] + ylen = [2, 2] + eloc = [0, 5, 10, 15] + elen = [3, 2, 3, 2] _check_case(xloc, xlen, yloc, ylen, eloc, elen) # TODO: different-length index objects + def test_lookup(): def _check(index): @@ -180,6 +207,7 @@ def _check(index): # corner cases + def test_intersect(): def _check_correct(a, b, expected): result = a.intersect(b) @@ -205,6 +233,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): check_cases(_check_case) + class TestBlockIndex(TestCase): def test_equals(self): @@ -243,6 +272,7 @@ def test_to_block_index(self): index = BlockIndex(10, [0, 5], [4, 5]) self.assert_(index.to_block_index() is index) + class TestIntIndex(TestCase): def test_equals(self): @@ -267,6 +297,7 @@ def test_to_int_index(self): index = IntIndex(10, [2, 3, 4, 5, 6]) self.assert_(index.to_int_index() is index) + class TestSparseOperators(TestCase): def _nan_op_tests(self, sparse_op, python_op): @@ -309,7 +340,8 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xfill = 0 yfill = 2 - result_block_vals, rb_index = sparse_op(x, xindex, xfill, y, yindex, yfill) + result_block_vals, rb_index = sparse_op( + x, xindex, xfill, y, yindex, yfill) result_int_vals, ri_index = sparse_op(x, xdindex, xfill, y, ydindex, yfill) @@ -334,6 +366,8 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): # too cute? oh but how I abhor code duplication check_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv'] + + def make_nanoptestf(op): def f(self): sparse_op = getattr(splib, 'sparse_nan%s' % op) @@ -342,6 +376,7 @@ def f(self): f.__name__ = 'test_nan%s' % op return f + def make_optestf(op): def f(self): sparse_op = getattr(splib, 'sparse_%s' % op) @@ -360,6 +395,5 @@ def f(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) - diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index 18a0a55c2db2a..a69385dd9a436 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -12,6 +12,7 @@ def assert_sp_list_equal(left, right): assert_sp_array_equal(left.to_array(), right.to_array()) + class TestSparseList(unittest.TestCase): _multiprocess_can_split_ = True @@ -101,5 +102,5 @@ def test_getitem(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index eeadcbea08466..d8ec567b2bca2 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -37,6 +37,7 @@ from test_array import assert_sp_array_equal + def _test_data1(): # nan-based arr = np.arange(20, dtype=float) @@ -47,6 +48,7 @@ def _test_data1(): return arr, index + def _test_data2(): # nan-based arr = np.arange(15, dtype=float) @@ -55,18 +57,21 @@ def _test_data2(): arr[-1:] = nan return arr, index + def _test_data1_zero(): # zero-based arr, index = _test_data1() arr[np.isnan(arr)] = 0 return arr, index + def _test_data2_zero(): # zero-based arr, index = _test_data2() arr[np.isnan(arr)] = 0 return arr, index + def assert_sp_series_equal(a, b): assert(a.index.equals(b.index)) assert_sp_array_equal(a, b) @@ -95,6 +100,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True): for col in right: assert(col in left) + def assert_sp_panel_equal(left, right, exact_indices=True): for item, frame in left.iterkv(): assert(item in right) @@ -108,9 +114,11 @@ def assert_sp_panel_equal(left, right, exact_indices=True): for item in right: assert(item in left) + class TestSparseSeries(TestCase, test_series.CheckNameIntegration): _multiprocess_can_split_ = True + def setUp(self): arr, index = _test_data1() @@ -143,7 +151,7 @@ def setUp(self): def test_construct_DataFrame_with_sp_series(self): # it works! - df = DataFrame({'col' : self.bseries}) + df = DataFrame({'col': self.bseries}) def test_sparse_to_dense(self): arr, index = _test_data1() @@ -558,6 +566,7 @@ def _compare_with_dense(obj, op): self.assertEquals(sparse_result, dense_result) to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew'] + def _compare_all(obj): for op in to_compare: _compare_with_dense(obj, op) @@ -608,19 +617,19 @@ def _check_matches(indices, expected): assert(v.sp_index.equals(expected)) indices1 = [BlockIndex(10, [2], [7]), - BlockIndex(10, [1, 6], [3, 4]), - BlockIndex(10, [0], [10])] + BlockIndex(10, [1, 6], [3, 4]), + BlockIndex(10, [0], [10])] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), - BlockIndex(10, [2], [7])] + BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value - data = {'a' : SparseSeries(np.arange(7), sparse_index=expected2, - fill_value=0)} + data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, + fill_value=0)} nose.tools.assert_raises(Exception, spf.homogenize, data) def test_fill_value_corner(self): @@ -681,17 +690,20 @@ def test_combine_first(self): assert_sp_series_equal(result, result2) assert_sp_series_equal(result, expected) + class TestSparseTimeSeries(TestCase): pass + class TestSparseDataFrame(TestCase, test_frame.SafeForSparse): klass = SparseDataFrame _multiprocess_can_split_ = True + def setUp(self): - self.data = {'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B' : [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C' : np.arange(10), - 'D' : [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} + self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C': np.arange(10), + 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} self.dates = bdate_range('1/1/2011', periods=10) @@ -821,8 +833,8 @@ def _test_roundtrip(frame): self._check_all(_test_roundtrip) def test_dense_to_sparse(self): - df = DataFrame({'A' : [nan, nan, nan, 1, 2], - 'B' : [1, 2, nan, nan, nan]}) + df = DataFrame({'A': [nan, nan, nan, 1, 2], + 'B': [1, 2, nan, nan, nan]}) sdf = df.to_sparse() self.assert_(isinstance(sdf, SparseDataFrame)) self.assert_(np.isnan(sdf.default_fill_value)) @@ -832,8 +844,8 @@ def test_dense_to_sparse(self): sdf = df.to_sparse(kind='integer') self.assert_(isinstance(sdf['A'].sp_index, IntIndex)) - df = DataFrame({'A' : [0, 0, 0, 1, 2], - 'B' : [1, 2, 0, 0, 0]}, dtype=float) + df = DataFrame({'A': [0, 0, 0, 1, 2], + 'B': [1, 2, 0, 0, 0]}, dtype=float) sdf = df.to_sparse(fill_value=0) self.assertEquals(sdf.default_fill_value, 0) tm.assert_frame_equal(sdf.to_dense(), df) @@ -957,7 +969,7 @@ def test_scalar_ops(self): def test_getitem(self): # #1585 select multiple columns - sdf = SparseDataFrame(index=[0, 1, 2], columns=['a', 'b','c']) + sdf = SparseDataFrame(index=[0, 1, 2], columns=['a', 'b', 'c']) result = sdf[['a', 'b']] exp = sdf.reindex(columns=['a', 'b']) @@ -972,7 +984,7 @@ def test_icol(self): assert_sp_series_equal(result, self.frame['A']) # preserve sparse index type. #2251 - data = {'A' : [0,1 ]} + data = {'A': [0, 1]} iframe = SparseDataFrame(data, default_kind='integer') self.assertEquals(type(iframe['A'].sp_index), type(iframe.icol(0).sp_index)) @@ -1062,7 +1074,6 @@ def _check_frame(frame): frame['K'] = frame.default_fill_value self.assertEquals(len(frame['K'].sp_values), 0) - self._check_all(_check_frame) def test_setitem_corner(self): @@ -1134,17 +1145,18 @@ def test_apply(self): self.assert_(self.empty.apply(np.sqrt) is self.empty) def test_apply_nonuq(self): - df_orig = DataFrame([[1,2,3], [4,5,6], [7,8,9]], index=['a','a','c']) + df_orig = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) df = df_orig.to_sparse() rs = df.apply(lambda s: s[0], axis=1) xp = Series([1., 4., 7.], ['a', 'a', 'c']) assert_series_equal(rs, xp) - #df.T breaks + # df.T breaks df = df_orig.T.to_sparse() rs = df.apply(lambda s: s[0], axis=0) - #no non-unique columns supported in sparse yet - #assert_series_equal(rs, xp) + # no non-unique columns supported in sparse yet + # assert_series_equal(rs, xp) def test_applymap(self): # just test that it works @@ -1261,10 +1273,10 @@ def test_take(self): assert_sp_frame_equal(result, expected) def test_density(self): - df = SparseDataFrame({'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B' : [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C' : np.arange(10), - 'D' : [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}) + df = SparseDataFrame({'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C': np.arange(10), + 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}) self.assertEquals(df.density, 0.75) @@ -1279,7 +1291,7 @@ def test_stack_sparse_frame(self): def _check(frame): dense_frame = frame.to_dense() - wp = Panel.from_dict({'foo' : frame}) + wp = Panel.from_dict({'foo': frame}) from_dense_lp = wp.to_frame() from_sparse_lp = spf.stack_sparse_frame(frame) @@ -1287,7 +1299,6 @@ def _check(frame): self.assert_(np.array_equal(from_dense_lp.values, from_sparse_lp.values)) - _check(self.frame) _check(self.iframe) @@ -1375,15 +1386,15 @@ def test_isin(self): def test_sparse_pow_issue(self): # #2220 - df = SparseDataFrame({'A' : [1.1,3.3],'B' : [2.5,-3.9]}) + df = SparseDataFrame({'A': [1.1, 3.3], 'B': [2.5, -3.9]}) # note : no error without nan - df = SparseDataFrame({'A' : [nan, 0, 1] }) + df = SparseDataFrame({'A': [nan, 0, 1]}) # note that 2 ** df works fine, also df ** 1 result = 1 ** df - r1 = result.take([0],1)['A'] + r1 = result.take([0], 1)['A'] r2 = result['A'] self.assertEqual(len(r2.sp_values), len(r1.sp_values)) @@ -1395,58 +1406,62 @@ def _dense_series_compare(s, f): dense_result = f(s.to_dense()) assert_series_equal(result.to_dense(), dense_result) + def _dense_frame_compare(frame, f): result = f(frame) assert(isinstance(frame, SparseDataFrame)) dense_result = f(frame.to_dense()) assert_frame_equal(result.to_dense(), dense_result) + def panel_data1(): index = bdate_range('1/1/2011', periods=8) return DataFrame({ - 'A' : [nan, nan, nan, 0, 1, 2, 3, 4], - 'B' : [0, 1, 2, 3, 4, nan, nan, nan], - 'C' : [0, 1, 2, nan, nan, nan, 3, 4], - 'D' : [nan, 0, 1, nan, 2, 3, 4, nan] - }, index=index) + 'A': [nan, nan, nan, 0, 1, 2, 3, 4], + 'B': [0, 1, 2, 3, 4, nan, nan, nan], + 'C': [0, 1, 2, nan, nan, nan, 3, 4], + 'D': [nan, 0, 1, nan, 2, 3, 4, nan] + }, index=index) def panel_data2(): index = bdate_range('1/1/2011', periods=9) return DataFrame({ - 'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5], - 'B' : [0, 1, 2, 3, 4, 5, nan, nan, nan], - 'C' : [0, 1, 2, nan, nan, nan, 3, 4, 5], - 'D' : [nan, 0, 1, nan, 2, 3, 4, 5, nan] - }, index=index) + 'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5], + 'B': [0, 1, 2, 3, 4, 5, nan, nan, nan], + 'C': [0, 1, 2, nan, nan, nan, 3, 4, 5], + 'D': [nan, 0, 1, nan, 2, 3, 4, 5, nan] + }, index=index) def panel_data3(): index = bdate_range('1/1/2011', periods=10).shift(-2) return DataFrame({ - 'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B' : [0, 1, 2, 3, 4, 5, 6, nan, nan, nan], - 'C' : [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'D' : [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan] - }, index=index) + 'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, 3, 4, 5, 6, nan, nan, nan], + 'C': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'D': [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan] + }, index=index) + class TestSparsePanel(TestCase, test_panel.SafeForLongAndSparse, test_panel.SafeForSparse): _multiprocess_can_split_ = True + @classmethod def assert_panel_equal(cls, x, y): assert_sp_panel_equal(x, y) def setUp(self): self.data_dict = { - 'ItemA' : panel_data1(), - 'ItemB' : panel_data2(), - 'ItemC' : panel_data3(), - 'ItemD' : panel_data1(), + 'ItemA': panel_data1(), + 'ItemB': panel_data2(), + 'ItemC': panel_data3(), + 'ItemD': panel_data1(), } self.panel = SparsePanel(self.data_dict) @@ -1628,7 +1643,7 @@ def _dense_comp(sparse): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) # nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure', diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index 2c8a3a65bd5ac..b75029c615735 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -90,7 +90,7 @@ def _results(self): def _coef_table(self): buffer = StringIO() buffer.write('%13s %13s %13s %13s %13s %13s\n' % - ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%')) + ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%')) template = '%13s %13.4f %13.4f %13.2f %13.4f %13.4f\n' for i, name in enumerate(self._cols): @@ -178,7 +178,7 @@ def _calc_stats(self): else: begin = 0 - B = betas[max(obs_total[begin] - 1, 0) : obs_total[i]] + B = betas[max(obs_total[begin] - 1, 0): obs_total[i]] mean_beta, std_beta, t_stat = _calc_t_stat(B, self._nw_lags_beta) mean_betas.append(mean_beta) std_betas.append(std_beta) diff --git a/pandas/stats/interface.py b/pandas/stats/interface.py index ff87aa1c9af26..d93eb83820822 100644 --- a/pandas/stats/interface.py +++ b/pandas/stats/interface.py @@ -117,7 +117,7 @@ def ols(**kwargs): del kwargs[rolling_field] if panel: - if pool == False: + if pool is False: klass = NonPooledPanelOLS else: klass = PanelOLS @@ -125,7 +125,7 @@ def ols(**kwargs): klass = OLS else: if panel: - if pool == False: + if pool is False: klass = NonPooledPanelOLS else: klass = MovingPanelOLS diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index b4e367ac1598b..7f44af28314b3 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -149,7 +149,7 @@ def rolling_count(arg, window, freq=None, center=False, time_rule=None): converted = np.isfinite(values).astype(float) result = rolling_sum(converted, window, min_periods=1, - center=center) # already converted + center=center) # already converted # putmask here? result[np.isnan(result)] = 0 @@ -164,6 +164,7 @@ def rolling_cov(arg1, arg2, window, min_periods=None, freq=None, arg1 = _conv_timerule(arg1, freq, time_rule) arg2 = _conv_timerule(arg2, freq, time_rule) window = min(window, len(arg1), len(arg2)) + def _get_cov(X, Y): mean = lambda x: rolling_mean(x, window, min_periods) count = rolling_count(X + Y, window) @@ -179,6 +180,7 @@ def _get_cov(X, Y): rs[-offset:] = np.nan return rs + @Substitution("Moving sample correlation", _binary_arg_flex, _flex_retval) @Appender(_doc_template) def rolling_corr(arg1, arg2, window, min_periods=None, freq=None, @@ -188,8 +190,8 @@ def _get_corr(a, b): center=center, time_rule=time_rule) den = (rolling_std(a, window, min_periods, freq=freq, center=center, time_rule=time_rule) * - rolling_std(b, window, min_periods, freq=freq, - center=center, time_rule=time_rule)) + rolling_std(b, window, min_periods, freq=freq, + center=center, time_rule=time_rule)) return num / den return _flex_binary_moment(arg1, arg2, _get_corr) @@ -288,6 +290,7 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, rs = _center_window(rs, window, axis) return rs + def _center_window(rs, window, axis): offset = int((window - 1) / 2.) if isinstance(rs, (Series, DataFrame, Panel)): @@ -306,6 +309,7 @@ def _center_window(rs, window, axis): rs[tuple(na_indexer)] = np.nan return rs + def _process_data_structure(arg, kill_inf=True): if isinstance(arg, DataFrame): return_hook = lambda v: type(arg)(v, index=arg.index, @@ -355,7 +359,7 @@ def ewma(arg, com=None, span=None, min_periods=0, freq=None, time_rule=None, def _ewma(v): result = algos.ewma(v, com, int(adjust)) first_index = _first_valid_index(v) - result[first_index : first_index + min_periods] = NaN + result[first_index: first_index + min_periods] = NaN return result return_hook, values = _process_data_structure(arg) @@ -461,7 +465,7 @@ def _conv_timerule(arg, freq, time_rule): if time_rule is not None: import warnings warnings.warn("time_rule argument is deprecated, replace with freq", - FutureWarning) + FutureWarning) freq = time_rule @@ -576,6 +580,7 @@ def call_cython(arg, window, minp): return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, center=center, time_rule=time_rule) + def rolling_window(arg, window=None, win_type=None, min_periods=None, freq=None, center=False, mean=True, time_rule=None, axis=0, **kwargs): @@ -628,14 +633,14 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, raise ValueError(('Do not specify window type if using custom ' 'weights')) window = com._asarray_tuplesafe(window).astype(float) - elif com.is_integer(window): #window size + elif com.is_integer(window): # window size if win_type is None: raise ValueError('Must specify window type') try: import scipy.signal as sig except ImportError: raise ImportError('Please install scipy to generate window weight') - win_type = _validate_win_type(win_type, kwargs) # may pop from kwargs + win_type = _validate_win_type(win_type, kwargs) # may pop from kwargs window = sig.get_window(win_type, window).astype(float) else: raise ValueError('Invalid window %s' % str(window)) @@ -653,17 +658,19 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, rs = _center_window(rs, len(window), axis) return rs + def _validate_win_type(win_type, kwargs): # may pop from kwargs - arg_map = {'kaiser' : ['beta'], - 'gaussian' : ['std'], - 'general_gaussian' : ['power', 'width'], - 'slepian' : ['width']} + arg_map = {'kaiser': ['beta'], + 'gaussian': ['std'], + 'general_gaussian': ['power', 'width'], + 'slepian': ['width']} if win_type in arg_map: return tuple([win_type] + _pop_args(win_type, arg_map[win_type], kwargs)) return win_type + def _pop_args(win_type, arg_names, kwargs): msg = '%s window requires %%s' % win_type all_args = [] @@ -695,17 +702,20 @@ def call_cython(arg, window, minp, **kwds): expanding_min = _expanding_func(algos.roll_min2, 'Expanding minimum') expanding_sum = _expanding_func(algos.roll_sum, 'Expanding sum') expanding_mean = _expanding_func(algos.roll_mean, 'Expanding mean') -expanding_median = _expanding_func(algos.roll_median_cython, 'Expanding median') +expanding_median = _expanding_func( + algos.roll_median_cython, 'Expanding median') expanding_std = _expanding_func(_ts_std, 'Unbiased expanding standard deviation', check_minp=_require_min_periods(2)) expanding_var = _expanding_func(algos.roll_var, 'Unbiased expanding variance', - check_minp=_require_min_periods(2)) -expanding_skew = _expanding_func(algos.roll_skew, 'Unbiased expanding skewness', - check_minp=_require_min_periods(3)) -expanding_kurt = _expanding_func(algos.roll_kurt, 'Unbiased expanding kurtosis', - check_minp=_require_min_periods(4)) + check_minp=_require_min_periods(2)) +expanding_skew = _expanding_func( + algos.roll_skew, 'Unbiased expanding skewness', + check_minp=_require_min_periods(3)) +expanding_kurt = _expanding_func( + algos.roll_kurt, 'Unbiased expanding kurtosis', + check_minp=_require_min_periods(4)) def expanding_count(arg, freq=None, center=False, time_rule=None): diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index d19898990022d..9ecf5c6ab715f 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -181,8 +181,8 @@ def _f_stat_raw(self): try: intercept = cols.get_loc('intercept') - R = np.concatenate((R[0 : intercept], R[intercept + 1:])) - r = np.concatenate((r[0 : intercept], r[intercept + 1:])) + R = np.concatenate((R[0: intercept], R[intercept + 1:])) + r = np.concatenate((r[0: intercept], r[intercept + 1:])) except KeyError: # no intercept pass @@ -485,7 +485,7 @@ def _coef_table(self): p_value = results['p_value'][name] line = coef_template % (name, - beta[name], std_err, t_stat, p_value, CI1, CI2) + beta[name], std_err, t_stat, p_value, CI1, CI2) buf.write(line) @@ -941,8 +941,8 @@ def get_result_simple(Fst, d): try: intercept = items.get_loc('intercept') - R = np.concatenate((R[0 : intercept], R[intercept + 1:])) - r = np.concatenate((r[0 : intercept], r[intercept + 1:])) + R = np.concatenate((R[0: intercept], R[intercept + 1:])) + r = np.concatenate((r[0: intercept], r[intercept + 1:])) except KeyError: # no intercept pass @@ -1313,6 +1313,8 @@ def _combine_rhs(rhs): # A little kludge so we can use this method for both # MovingOLS and MovingPanelOLS + + def _y_converter(y): y = y.values.squeeze() if y.ndim == 0: # pragma: no cover diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index 7dde37822c02b..3173e05ae8e9d 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -182,7 +182,7 @@ def _convert_x(self, x): cat_mapping[key] = dict(enumerate(distinct_values)) new_values = np.searchsorted(distinct_values, values) x_converted[key] = DataFrame(new_values, index=df.index, - columns=df.columns) + columns=df.columns) if len(cat_mapping) == 0: x_converted = x @@ -262,7 +262,8 @@ def _add_categorical_dummies(self, panel, cat_mappings): if dropped_dummy or not self._use_all_dummies: if effect in self._dropped_dummies: - to_exclude = mapped_name = self._dropped_dummies.get(effect) + to_exclude = mapped_name = self._dropped_dummies.get( + effect) if val_map: mapped_name = val_map[to_exclude] @@ -273,7 +274,8 @@ def _add_categorical_dummies(self, panel, cat_mappings): raise Exception('%s not in %s' % (to_exclude, dummies.columns)) - self.log('-- Excluding dummy for %s: %s' % (effect, to_exclude)) + self.log( + '-- Excluding dummy for %s: %s' % (effect, to_exclude)) dummies = dummies.filter(dummies.columns - [mapped_name]) dropped_dummy = True @@ -604,8 +606,8 @@ def _var_beta_raw(self): xx = xx - cum_xx[i - window] result = _var_beta_panel(y_slice, x_slice, beta[n], xx, rmse[n], - cluster_axis, self._nw_lags, - nobs[n], df[n], self._nw_overlap) + cluster_axis, self._nw_lags, + nobs[n], df[n], self._nw_overlap) results.append(result) @@ -745,7 +747,7 @@ def __init__(self, y, x, window_type='full_sample', window=None, def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, - nw_lags, nobs, df, nw_overlap): + nw_lags, nobs, df, nw_overlap): from pandas.core.frame import group_agg xx_inv = math.inv(xx) @@ -777,7 +779,7 @@ def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, xox = 0 for i in range(len(x.index.levels[0])): - xox += math.newey_west(m[i : i + 1], nw_lags, + xox += math.newey_west(m[i: i + 1], nw_lags, nobs, df, nw_overlap) return np.dot(xx_inv, np.dot(xox, xx_inv)) diff --git a/pandas/stats/tests/__init__.py b/pandas/stats/tests/__init__.py index 8b137891791fe..e69de29bb2d1d 100644 --- a/pandas/stats/tests/__init__.py +++ b/pandas/stats/tests/__init__.py @@ -1 +0,0 @@ - diff --git a/pandas/stats/tests/common.py b/pandas/stats/tests/common.py index b2060d30ecd30..2866a36bc435a 100644 --- a/pandas/stats/tests/common.py +++ b/pandas/stats/tests/common.py @@ -8,7 +8,7 @@ import numpy as np from pandas import DataFrame, bdate_range -from pandas.util.testing import assert_almost_equal # imported in other tests +from pandas.util.testing import assert_almost_equal # imported in other tests N = 100 K = 4 @@ -17,13 +17,15 @@ COLS = ['Col' + c for c in string.ascii_uppercase[:K]] + def makeDataFrame(): data = DataFrame(np.random.randn(N, K), - columns=COLS, - index=DATE_RANGE) + columns=COLS, + index=DATE_RANGE) return data + def getBasicDatasets(): A = makeDataFrame() B = makeDataFrame() @@ -31,12 +33,14 @@ def getBasicDatasets(): return A, B, C + def check_for_scipy(): try: import scipy except ImportError: raise nose.SkipTest('no scipy') + def check_for_statsmodels(): _have_statsmodels = True try: @@ -53,7 +57,6 @@ def setUp(self): check_for_scipy() check_for_statsmodels() - self.A, self.B, self.C = getBasicDatasets() self.createData1() @@ -80,14 +83,14 @@ def createData1(self): C = C[:30] self.panel_y = A - self.panel_x = {'B' : B, 'C' : C} + self.panel_x = {'B': B, 'C': C} self.series_panel_y = A.filter(['ColA']) - self.series_panel_x = {'B' : B.filter(['ColA']), - 'C' : C.filter(['ColA'])} + self.series_panel_x = {'B': B.filter(['ColA']), + 'C': C.filter(['ColA'])} self.series_y = A['ColA'] - self.series_x = {'B' : B['ColA'], - 'C' : C['ColA']} + self.series_x = {'B': B['ColA'], + 'C': C['ColA']} def createData2(self): y_data = [[1, np.NaN], @@ -98,7 +101,7 @@ def createData2(self): datetime(2000, 1, 3)] y_cols = ['A', 'B'] self.panel_y2 = DataFrame(np.array(y_data), index=y_index, - columns=y_cols) + columns=y_cols) x1_data = [[6, np.NaN], [7, 8], @@ -110,7 +113,7 @@ def createData2(self): datetime(2000, 1, 4)] x1_cols = ['A', 'B'] x1 = DataFrame(np.array(x1_data), index=x1_index, - columns=x1_cols) + columns=x1_cols) x2_data = [[13, 14, np.NaN], [15, np.NaN, np.NaN], @@ -124,9 +127,9 @@ def createData2(self): datetime(2000, 1, 5)] x2_cols = ['C', 'A', 'B'] x2 = DataFrame(np.array(x2_data), index=x2_index, - columns=x2_cols) + columns=x2_cols) - self.panel_x2 = {'x1' : x1, 'x2' : x2} + self.panel_x2 = {'x1': x1, 'x2': x2} def createData3(self): y_data = [[1, 2], @@ -135,7 +138,7 @@ def createData3(self): datetime(2000, 1, 2)] y_cols = ['A', 'B'] self.panel_y3 = DataFrame(np.array(y_data), index=y_index, - columns=y_cols) + columns=y_cols) x1_data = [['A', 'B'], ['C', 'A']] @@ -143,7 +146,7 @@ def createData3(self): datetime(2000, 1, 2)] x1_cols = ['A', 'B'] x1 = DataFrame(np.array(x1_data), index=x1_index, - columns=x1_cols) + columns=x1_cols) x2_data = [['foo', 'bar'], ['baz', 'foo']] @@ -151,6 +154,6 @@ def createData3(self): datetime(2000, 1, 2)] x2_cols = ['A', 'B'] x2 = DataFrame(np.array(x2_data), index=x2_index, - columns=x2_cols) + columns=x2_cols) - self.panel_x3 = {'x1' : x1, 'x2' : x2} + self.panel_x3 = {'x1': x1, 'x2': x2} diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index f2ebef5fd53e5..ef262cfaf44bb 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -4,6 +4,7 @@ import numpy as np + class TestFamaMacBeth(BaseTest): def testFamaMacBethRolling(self): # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y, @@ -24,7 +25,7 @@ def checkFamaMacBethExtended(self, window_type, x, y, **kwds): **kwds) self._check_stuff_works(result) - index = result._index + index = result._index time = len(index) for i in xrange(time - window + 1): @@ -57,5 +58,5 @@ def _check_stuff_works(self, result): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/stats/tests/test_math.py b/pandas/stats/tests/test_math.py index 6553023f4f392..92dedb35f4512 100644 --- a/pandas/stats/tests/test_math.py +++ b/pandas/stats/tests/test_math.py @@ -25,6 +25,7 @@ except ImportError: _have_statsmodels = False + class TestMath(unittest.TestCase): _nan_locs = np.arange(20, 40) @@ -63,5 +64,5 @@ def test_inv_illformed(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 7d25fe342867b..a155b9ef060ef 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -10,7 +10,7 @@ from pandas import Series, DataFrame, bdate_range, isnull, notnull from pandas.util.testing import ( assert_almost_equal, assert_series_equal, assert_frame_equal - ) +) from pandas.util.py3compat import PY3 import pandas.core.datetools as datetools import pandas.stats.moments as mom @@ -18,6 +18,7 @@ N, K = 100, 10 + class TestMoments(unittest.TestCase): _multiprocess_can_split_ = True @@ -157,8 +158,8 @@ def test_cmov_window_special(self): raise nose.SkipTest win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] - kwds = [{'beta' : 1.}, {'std' : 1.}, {'power' : 2., 'width' : 2.}, - {'width' : 0.5}] + kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.}, + {'width': 0.5}] for wt, k in zip(win_types, kwds): vals = np.random.randn(10) @@ -174,28 +175,30 @@ def test_rolling_median(self): def test_rolling_min(self): self._check_moment_func(mom.rolling_min, np.min) - a = np.array([1,2,3,4,5]) + a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_min(a, window=100, min_periods=1) assert_almost_equal(b, np.ones(len(a))) - self.assertRaises(ValueError, mom.rolling_min, np.array([1,2,3]), window=3, min_periods=5) + self.assertRaises(ValueError, mom.rolling_min, np.array([1, + 2, 3]), window=3, min_periods=5) def test_rolling_max(self): self._check_moment_func(mom.rolling_max, np.max) - a = np.array([1,2,3,4,5]) + a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_max(a, window=100, min_periods=1) assert_almost_equal(a, b) - self.assertRaises(ValueError, mom.rolling_max, np.array([1,2,3]), window=3, min_periods=5) + self.assertRaises(ValueError, mom.rolling_max, np.array([1, + 2, 3]), window=3, min_periods=5) def test_rolling_quantile(self): qs = [.1, .5, .9] def scoreatpercentile(a, per): - values = np.sort(a,axis=0) + values = np.sort(a, axis=0) - idx = per /1. * (values.shape[0] - 1) + idx = per / 1. * (values.shape[0] - 1) return values[int(idx)] for q in qs: @@ -204,6 +207,7 @@ def f(x, window, min_periods=None, freq=None, center=False): min_periods=min_periods, freq=freq, center=center) + def alt(x): return scoreatpercentile(x, q) @@ -211,7 +215,8 @@ def alt(x): def test_rolling_apply(self): ser = Series([]) - assert_series_equal(ser, mom.rolling_apply(ser, 10, lambda x:x.mean())) + assert_series_equal( + ser, mom.rolling_apply(ser, 10, lambda x: x.mean())) def roll_mean(x, window, min_periods=None, freq=None, center=False): return mom.rolling_apply(x, window, @@ -239,13 +244,13 @@ def test_rolling_std(self): lambda x: np.std(x, ddof=0)) def test_rolling_std_1obs(self): - result = mom.rolling_std(np.array([1.,2.,3.,4.,5.]), + result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), 1, min_periods=1) expected = np.zeros(5) assert_almost_equal(result, expected) - result = mom.rolling_std(np.array([np.nan,np.nan,3.,4.,5.]), + result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), 3, min_periods=2) self.assert_(np.isnan(result[2])) @@ -378,7 +383,6 @@ def _check_ndarray(self, func, static_comp, window=50, result = func(arr, 50) assert_almost_equal(result[-1], static_comp(arr[10:-10])) - if has_center: if has_min_periods: result = func(arr, 20, min_periods=15, center=True) @@ -458,7 +462,6 @@ def _check_structures(self, func, static_comp, assert_series_equal(series_xp, series_rs) assert_frame_equal(frame_xp, frame_rs) - def test_legacy_time_rule_arg(self): from StringIO import StringIO # suppress deprecation warnings @@ -624,9 +627,9 @@ def test_expanding_apply(self): def expanding_mean(x, min_periods=1, freq=None): return mom.expanding_apply(x, - lambda x: x.mean(), - min_periods=min_periods, - freq=freq) + lambda x: x.mean(), + min_periods=min_periods, + freq=freq) self._check_expanding(expanding_mean, np.mean) def test_expanding_corr(self): @@ -728,5 +731,5 @@ def _check_expanding(self, func, static_comp, has_min_periods=True, if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 60ec676439db6..ebdb8e178d03b 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -33,10 +33,12 @@ except ImportError: _have_statsmodels = False + def _check_repr(obj): repr(obj) str(obj) + def _compare_ols_results(model1, model2): assert(type(model1) == type(model2)) @@ -45,12 +47,15 @@ def _compare_ols_results(model1, model2): else: _compare_fullsample_ols(model1, model2) + def _compare_fullsample_ols(model1, model2): assert_series_equal(model1.beta, model2.beta) + def _compare_moving_ols(model1, model2): assert_frame_equal(model1.beta, model2.beta) + class TestOLS(BaseTest): _multiprocess_can_split_ = True @@ -87,7 +92,8 @@ def testOLSWithDatasets_scotland(self): self.checkDataSet(sm.datasets.scotland.load()) # degenerate case fails on some platforms - # self.checkDataSet(datasets.ccard.load(), 39, 49) # one col in X all 0s + # self.checkDataSet(datasets.ccard.load(), 39, 49) # one col in X all + # 0s def testWLS(self): # WLS centered SS changed (fixed) in 0.5.0 @@ -105,7 +111,7 @@ def testWLS(self): self._check_wls(X, Y, weights) def _check_wls(self, x, y, weights): - result = ols(y=y, x=x, weights=1/weights) + result = ols(y=y, x=x, weights=1 / weights) combined = x.copy() combined['__y__'] = y @@ -116,7 +122,7 @@ def _check_wls(self, x, y, weights): aweights = combined.pop('__weights__').values exog = sm.add_constant(combined.values, prepend=False) - sm_result = sm.WLS(endog, exog, weights=1/aweights).fit() + sm_result = sm.WLS(endog, exog, weights=1 / aweights).fit() assert_almost_equal(sm_result.params, result._beta_raw) assert_almost_equal(sm_result.resid, result._resid_raw) @@ -125,8 +131,8 @@ def _check_wls(self, x, y, weights): self.checkMovingOLS('expanding', x, y, weights=weights) def checkDataSet(self, dataset, start=None, end=None, skip_moving=False): - exog = dataset.exog[start : end] - endog = dataset.endog[start : end] + exog = dataset.exog[start: end] + endog = dataset.endog[start: end] x = DataFrame(exog, index=np.arange(exog.shape[0]), columns=np.arange(exog.shape[1])) y = Series(endog, index=np.arange(len(endog))) @@ -241,6 +247,7 @@ def test_ols_object_dtype(self): model = ols(y=df[0], x=df[1]) summary = repr(model) + class TestOLSMisc(unittest.TestCase): _multiprocess_can_split_ = True @@ -323,7 +330,7 @@ def test_predict(self): x3 = x2 + 10 pred3 = model1.predict(x=x3) x3['intercept'] = 1. - x3 = x3.reindex(columns = model1.beta.index) + x3 = x3.reindex(columns=model1.beta.index) expected = Series(np.dot(x3.values, model1.beta.values), x3.index) assert_series_equal(expected, pred3) @@ -332,13 +339,13 @@ def test_predict(self): assert_series_equal(Series(0., pred4.index), pred4) def test_predict_longer_exog(self): - exogenous = {"1998": "4760","1999": "5904","2000": "4504", - "2001": "9808","2002": "4241","2003": "4086", - "2004": "4687","2005": "7686","2006": "3740", - "2007": "3075","2008": "3753","2009": "4679", - "2010": "5468","2011": "7154","2012": "4292", - "2013": "4283","2014": "4595","2015": "9194", - "2016": "4221","2017": "4520"} + exogenous = {"1998": "4760", "1999": "5904", "2000": "4504", + "2001": "9808", "2002": "4241", "2003": "4086", + "2004": "4687", "2005": "7686", "2006": "3740", + "2007": "3075", "2008": "3753", "2009": "4679", + "2010": "5468", "2011": "7154", "2012": "4292", + "2013": "4283", "2014": "4595", "2015": "9194", + "2016": "4221", "2017": "4520"} endogenous = {"1998": "691", "1999": "1580", "2000": "80", "2001": "1450", "2002": "555", "2003": "956", "2004": "877", "2005": "614", "2006": "468", @@ -365,7 +372,7 @@ def test_series_rhs(self): y = tm.makeTimeSeries() x = tm.makeTimeSeries() model = ols(y=y, x=x) - expected = ols(y=y, x={'x' : x}) + expected = ols(y=y, x={'x': x}) assert_series_equal(model.beta, expected.beta) def test_various_attributes(self): @@ -389,13 +396,13 @@ def test_catch_regressor_overlap(self): df2 = tm.makeTimeDataFrame().ix[:, ['B', 'C', 'D']] y = tm.makeTimeSeries() - data = {'foo' : df1, 'bar' : df2} + data = {'foo': df1, 'bar': df2} self.assertRaises(Exception, ols, y=y, x=data) def test_plm_ctor(self): y = tm.makeTimeDataFrame() - x = {'a' : tm.makeTimeDataFrame(), - 'b' : tm.makeTimeDataFrame()} + x = {'a': tm.makeTimeDataFrame(), + 'b': tm.makeTimeDataFrame()} model = ols(y=y, x=x, intercept=False) model.summary @@ -405,8 +412,8 @@ def test_plm_ctor(self): def test_plm_attrs(self): y = tm.makeTimeDataFrame() - x = {'a' : tm.makeTimeDataFrame(), - 'b' : tm.makeTimeDataFrame()} + x = {'a': tm.makeTimeDataFrame(), + 'b': tm.makeTimeDataFrame()} rmodel = ols(y=y, x=x, window=10) model = ols(y=y, x=x) @@ -415,16 +422,16 @@ def test_plm_attrs(self): def test_plm_lagged_y_predict(self): y = tm.makeTimeDataFrame() - x = {'a' : tm.makeTimeDataFrame(), - 'b' : tm.makeTimeDataFrame()} + x = {'a': tm.makeTimeDataFrame(), + 'b': tm.makeTimeDataFrame()} model = ols(y=y, x=x, window=10) result = model.lagged_y_predict(2) def test_plm_f_test(self): y = tm.makeTimeDataFrame() - x = {'a' : tm.makeTimeDataFrame(), - 'b' : tm.makeTimeDataFrame()} + x = {'a': tm.makeTimeDataFrame(), + 'b': tm.makeTimeDataFrame()} model = ols(y=y, x=x) @@ -438,14 +445,15 @@ def test_plm_f_test(self): def test_plm_exclude_dummy_corner(self): y = tm.makeTimeDataFrame() - x = {'a' : tm.makeTimeDataFrame(), - 'b' : tm.makeTimeDataFrame()} + x = {'a': tm.makeTimeDataFrame(), + 'b': tm.makeTimeDataFrame()} - model = ols(y=y, x=x, entity_effects=True, dropped_dummies={'entity' : 'D'}) + model = ols( + y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'}) model.summary self.assertRaises(Exception, ols, y=y, x=x, entity_effects=True, - dropped_dummies={'entity' : 'E'}) + dropped_dummies={'entity': 'E'}) def test_columns_tuples_summary(self): # #1837 @@ -456,6 +464,7 @@ def test_columns_tuples_summary(self): model = ols(y=Y, x=X) model.summary + class TestPanelOLS(BaseTest): _multiprocess_can_split_ = True @@ -473,7 +482,8 @@ def testFiltering(self): index = x.index.get_level_values(0) index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) - self.assertTrue;(exp_index.equals(index)) + self.assertTrue + (exp_index.equals(index)) index = x.index.get_level_values(1) index = Index(sorted(set(index))) @@ -507,8 +517,8 @@ def testFiltering(self): def test_wls_panel(self): y = tm.makeTimeDataFrame() - x = Panel({'x1' : tm.makeTimeDataFrame(), - 'x2' : tm.makeTimeDataFrame()}) + x = Panel({'x1': tm.makeTimeDataFrame(), + 'x2': tm.makeTimeDataFrame()}) y.ix[[1, 7], 'A'] = np.nan y.ix[[6, 15], 'B'] = np.nan @@ -517,7 +527,7 @@ def test_wls_panel(self): stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) - for k, v in x.iterkv())) + for k, v in x.iterkv())) weights = x.std('items') stack_weights = weights.stack() @@ -526,8 +536,8 @@ def test_wls_panel(self): stack_x.index = stack_x.index._tuple_index stack_weights.index = stack_weights.index._tuple_index - result = ols(y=y, x=x, weights=1/weights) - expected = ols(y=stack_y, x=stack_x, weights=1/stack_weights) + result = ols(y=y, x=x, weights=1 / weights) + expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) @@ -560,7 +570,7 @@ def testWithEntityEffects(self): def testWithEntityEffectsAndDroppedDummies(self): result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True, - dropped_dummies={'entity' : 'B'}) + dropped_dummies={'entity': 'B'}) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]], @@ -583,7 +593,7 @@ def testWithXEffects(self): def testWithXEffectsAndDroppedDummies(self): result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'], - dropped_dummies={'x1' : 30}) + dropped_dummies={'x1': 30}) res = result._x assert_almost_equal(result._y.values.flat, [1, 4, 5]) @@ -608,7 +618,7 @@ def testWithXEffectsAndConversion(self): def testWithXEffectsAndConversionAndDroppedDummies(self): result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'], - dropped_dummies={'x2' : 'foo'}) + dropped_dummies={'x2': 'foo'}) assert_almost_equal(result._y.values.flat, [1, 2, 3, 4]) exp_x = [[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], @@ -631,7 +641,6 @@ def testForSeries(self): self.series_x, self.series_y, nw_lags=1, nw_overlap=True) - def testRolling(self): self.checkMovingOLS(self.panel_x, self.panel_y) @@ -671,7 +680,8 @@ def testRollingWithNeweyWestAndTimeEffectsAndEntityCluster(self): time_effects=True) def testExpanding(self): - self.checkMovingOLS(self.panel_x, self.panel_y, window_type='expanding') + self.checkMovingOLS( + self.panel_x, self.panel_y, window_type='expanding') def testNonPooled(self): self.checkNonPooled(y=self.panel_y, x=self.panel_x) @@ -762,6 +772,7 @@ def test_auto_rolling_window_type(self): assert_frame_equal(window_model.beta, rolling_model.beta) + def _check_non_raw_results(model): _check_repr(model) _check_repr(model.resid) @@ -769,6 +780,7 @@ def _check_non_raw_results(model): _check_repr(model.y_fitted) _check_repr(model.y_predict) + def _period_slice(panelModel, i): index = panelModel._x_trans.index period = index.levels[0][i] @@ -777,6 +789,7 @@ def _period_slice(panelModel, i): return slice(L, R) + class TestOLSFilter(unittest.TestCase): _multiprocess_can_split_ = True @@ -802,29 +815,29 @@ def setUp(self): ts = Series([np.nan, 5, 8, 9, 7], index=date_index) self.TS4 = ts - data = {'x1' : self.TS2, 'x2' : self.TS4} + data = {'x1': self.TS2, 'x2': self.TS4} self.DF1 = DataFrame(data=data) - data = {'x1' : self.TS2, 'x2' : self.TS4} + data = {'x1': self.TS2, 'x2': self.TS4} self.DICT1 = data def testFilterWithSeriesRHS(self): (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, {'x1' : self.TS2}, None) + index, valid) = _filter_data(self.TS1, {'x1': self.TS2}, None) self.tsAssertEqual(self.TS1, lhs) self.tsAssertEqual(self.TS2[:3], rhs['x1']) self.tsAssertEqual(self.TS2, rhs_pre['x1']) def testFilterWithSeriesRHS2(self): (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS2, {'x1' : self.TS1}, None) + index, valid) = _filter_data(self.TS2, {'x1': self.TS1}, None) self.tsAssertEqual(self.TS2[:3], lhs) self.tsAssertEqual(self.TS1, rhs['x1']) self.tsAssertEqual(self.TS1, rhs_pre['x1']) def testFilterWithSeriesRHS3(self): (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS3, {'x1' : self.TS4}, None) + index, valid) = _filter_data(self.TS3, {'x1': self.TS4}, None) exp_lhs = self.TS3[2:3] exp_rhs = self.TS4[2:3] exp_rhs_pre = self.TS4[1:] @@ -834,7 +847,7 @@ def testFilterWithSeriesRHS3(self): def testFilterWithDataFrameRHS(self): (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, self.DF1, None) + index, valid) = _filter_data(self.TS1, self.DF1, None) exp_lhs = self.TS1[1:] exp_rhs1 = self.TS2[1:3] exp_rhs2 = self.TS4[1:3] @@ -844,7 +857,7 @@ def testFilterWithDataFrameRHS(self): def testFilterWithDictRHS(self): (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, self.DICT1, None) + index, valid) = _filter_data(self.TS1, self.DICT1, None) exp_lhs = self.TS1[1:] exp_rhs1 = self.TS2[1:3] exp_rhs2 = self.TS4[1:3] @@ -858,5 +871,5 @@ def tsAssertEqual(self, ts1, ts2): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index b48fc5bb0cdc4..282a794980979 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -35,6 +35,7 @@ DECIMAL_3 = 3 DECIMAL_2 = 2 + class CheckVAR(object): def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_3) @@ -51,21 +52,21 @@ def test_df_eq(self): def test_rmse(self): results = self.res1.results for i in range(len(results)): - assert_almost_equal(results[i].mse_resid**.5, - eval('self.res2.rmse_'+str(i+1)), DECIMAL_6) + assert_almost_equal(results[i].mse_resid ** .5, + eval('self.res2.rmse_' + str(i + 1)), DECIMAL_6) def test_rsquared(self): results = self.res1.results for i in range(len(results)): assert_almost_equal(results[i].rsquared, - eval('self.res2.rsquared_'+str(i+1)), DECIMAL_3) + eval('self.res2.rsquared_' + str(i + 1)), DECIMAL_3) def test_llf(self): results = self.res1.results assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_2) for i in range(len(results)): assert_almost_equal(results[i].llf, - eval('self.res2.llf_'+str(i+1)), DECIMAL_2) + eval('self.res2.llf_' + str(i + 1)), DECIMAL_2) def test_aic(self): assert_almost_equal(self.res1.aic, self.res2.aic) @@ -89,8 +90,8 @@ def test_bse(self): class Foo(object): def __init__(self): data = sm.datasets.macrodata.load() - data = data.data[['realinv','realgdp','realcons']].view((float,3)) - data = diff(log(data),axis=0) + data = data.data[['realinv', 'realgdp', 'realcons']].view((float, 3)) + data = diff(log(data), axis=0) self.res1 = VAR2(endog=data).fit(maxlag=2) from results import results_var self.res2 = results_var.MacrodataResults() @@ -137,14 +138,15 @@ def plot(self, names=None): def serial_test(self, lags_pt=16, type='PT.asymptotic'): f = r['serial.test'] - test = f(self._estimate, **{'lags.pt' : lags_pt, - 'type' : type}) + test = f(self._estimate, **{'lags.pt': lags_pt, + 'type': type}) return test def data_summary(self): print r.summary(self.rdata) + class TestVAR(TestCase): def setUp(self): diff --git a/pandas/stats/var.py b/pandas/stats/var.py index a4eb8920a3b40..9390eef95700a 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -129,7 +129,8 @@ def granger_causality(self): for col in self._columns: d[col] = {} for i in xrange(1, 1 + self._p): - lagged_data = self._lagged_data[i].filter(self._columns - [col]) + lagged_data = self._lagged_data[i].filter( + self._columns - [col]) for key, value in lagged_data.iteritems(): d[col][_make_param_name(i, key)] = value @@ -312,9 +313,9 @@ def _data_xs(self, i): def _forecast_cov_raw(self, n): resid = self._forecast_cov_resid_raw(n) - #beta = self._forecast_cov_beta_raw(n) + # beta = self._forecast_cov_beta_raw(n) - #return [a + b for a, b in izip(resid, beta)] + # return [a + b for a, b in izip(resid, beta)] # TODO: ignore the beta forecast std err until it's verified return resid @@ -428,7 +429,7 @@ def _lag_betas(self): """ k = self._k b = self._beta_raw - return [b[k * i : k * (i + 1)].T for i in xrange(self._p)] + return [b[k * i: k * (i + 1)].T for i in xrange(self._p)] @cache_readonly def _lagged_data(self): diff --git a/pandas/tests/__init__.py b/pandas/tests/__init__.py index 8b137891791fe..e69de29bb2d1d 100644 --- a/pandas/tests/__init__.py +++ b/pandas/tests/__init__.py @@ -1 +0,0 @@ - diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3bf64e5d3d8ce..8706bb9cf7f4f 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -11,6 +11,7 @@ class TestMatch(unittest.TestCase): _multiprocess_can_split_ = True + def test_ints(self): values = np.array([0, 2, 1]) to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0]) @@ -30,6 +31,7 @@ def test_strings(self): class TestUnique(unittest.TestCase): _multiprocess_can_split_ = True + def test_ints(self): arr = np.random.randint(0, 100, size=50) @@ -44,7 +46,8 @@ def test_objects(self): def test_object_refcount_bug(self): lst = ['A', 'B', 'C', 'D', 'E'] - for i in xrange(1000): len(algos.unique(lst)) + for i in xrange(1000): + len(algos.unique(lst)) def test_on_index_object(self): mindex = pd.MultiIndex.from_arrays([np.arange(5).repeat(5), @@ -59,6 +62,7 @@ def test_on_index_object(self): tm.assert_almost_equal(result, expected) + def test_quantile(): s = Series(np.random.randn(100)) @@ -68,5 +72,5 @@ def test_quantile(): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 1562941ec474e..5b1d6c31403cb 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -17,20 +17,22 @@ _multiprocess_can_split_ = True + def test_is_sequence(): - is_seq=com._is_sequence - assert(is_seq((1,2))) - assert(is_seq([1,2])) + is_seq = com._is_sequence + assert(is_seq((1, 2))) + assert(is_seq([1, 2])) assert(not is_seq("abcd")) assert(not is_seq(u"abcd")) assert(not is_seq(np.int64)) + def test_notnull(): assert notnull(1.) assert not notnull(None) assert not notnull(np.NaN) - with cf.option_context("mode.use_inf_as_null",False): + with cf.option_context("mode.use_inf_as_null", False): assert notnull(np.inf) assert notnull(-np.inf) @@ -38,7 +40,7 @@ def test_notnull(): result = notnull(arr) assert result.all() - with cf.option_context("mode.use_inf_as_null",True): + with cf.option_context("mode.use_inf_as_null", True): assert not notnull(np.inf) assert not notnull(-np.inf) @@ -46,12 +48,13 @@ def test_notnull(): result = notnull(arr) assert result.sum() == 2 - with cf.option_context("mode.use_inf_as_null",False): + with cf.option_context("mode.use_inf_as_null", False): float_series = Series(np.random.randn(5)) obj_series = Series(np.random.randn(5), dtype=object) assert(isinstance(notnull(float_series), Series)) assert(isinstance(notnull(obj_series), Series)) + def test_isnull(): assert not isnull(1.) assert isnull(None) @@ -77,7 +80,7 @@ def test_isnull_lists(): exp = np.array([[False]]) assert(np.array_equal(result, exp)) - result = isnull([[1],[2]]) + result = isnull([[1], [2]]) exp = np.array([[False], [False]]) assert(np.array_equal(result, exp)) @@ -103,19 +106,23 @@ def test_isnull_datetime(): assert(mask[0]) assert(not mask[1:].any()) + def test_any_none(): assert(com._any_none(1, 2, 3, None)) assert(not com._any_none(1, 2, 3, 4)) + def test_all_not_none(): assert(com._all_not_none(1, 2, 3, 4)) assert(not com._all_not_none(1, 2, 3, None)) assert(not com._all_not_none(None, None, None, None)) + def test_rands(): r = com.rands(10) assert(len(r) == 10) + def test_adjoin(): data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], @@ -126,6 +133,7 @@ def test_adjoin(): assert(adjoined == expected) + def test_iterpairs(): data = [1, 2, 3, 4] expected = [(1, 2), @@ -136,17 +144,18 @@ def test_iterpairs(): assert(result == expected) + def test_split_ranges(): def _bin(x, width): "return int(x) as a base2 string of given width" - return ''.join(str((x>>i)&1) for i in xrange(width-1,-1,-1)) + return ''.join(str((x >> i) & 1) for i in xrange(width - 1, -1, -1)) def test_locs(mask): nfalse = sum(np.array(mask) == 0) - remaining=0 + remaining = 0 for s, e in com.split_ranges(mask): - remaining += e-s + remaining += e - s assert 0 not in mask[s:e] @@ -154,10 +163,10 @@ def test_locs(mask): assert remaining + nfalse == len(mask) # exhaustively test all possible mask sequences of length 8 - ncols=8 - for i in range(2**ncols): - cols=map(int,list(_bin(i,ncols))) # count up in base2 - mask=[cols[i] == 1 for i in range(len(cols))] + ncols = 8 + for i in range(2 ** ncols): + cols = map(int, list(_bin(i, ncols))) # count up in base2 + mask = [cols[i] == 1 for i in range(len(cols))] test_locs(mask) # base cases @@ -165,24 +174,28 @@ def test_locs(mask): test_locs([0]) test_locs([1]) + def test_indent(): s = 'a b c\nd e f' result = com.indent(s, spaces=6) assert(result == ' a b c\n d e f') + def test_banner(): ban = com.banner('hi') assert(ban == ('%s\nhi\n%s' % ('=' * 80, '=' * 80))) + def test_map_indices_py(): data = [4, 3, 2, 1] - expected = {4 : 0, 3 : 1, 2 : 2, 1 : 3} + expected = {4: 0, 3: 1, 2: 2, 1: 3} result = com.map_indices_py(data) assert(result == expected) + def test_union(): a = [1, 2, 3] b = [4, 5, 6] @@ -191,6 +204,7 @@ def test_union(): assert((a + b) == union) + def test_difference(): a = [1, 2, 3] b = [1, 2, 3, 4, 5, 6] @@ -199,6 +213,7 @@ def test_difference(): assert([4, 5, 6] == inter) + def test_intersection(): a = [1, 2, 3] b = [1, 2, 3, 4, 5, 6] @@ -207,17 +222,19 @@ def test_intersection(): assert(a == inter) + def test_groupby(): values = ['foo', 'bar', 'baz', 'baz2', 'qux', 'foo3'] - expected = {'f' : ['foo', 'foo3'], - 'b' : ['bar', 'baz', 'baz2'], - 'q' : ['qux']} + expected = {'f': ['foo', 'foo3'], + 'b': ['bar', 'baz', 'baz2'], + 'q': ['qux']} grouped = com.groupby(values, lambda x: x[0]) for k, v in grouped: assert v == expected[k] + def test_ensure_int32(): values = np.arange(10, dtype=np.int32) result = com._ensure_int32(values) @@ -242,26 +259,26 @@ def test_ensure_int32(): # expected = u"\u05d0".encode('utf-8') # assert (result == expected) + def test_pprint_thing(): if py3compat.PY3: raise nose.SkipTest - pp_t=com.pprint_thing + pp_t = com.pprint_thing - assert(pp_t('a')==u'a') - assert(pp_t(u'a')==u'a') - assert(pp_t(None)=='') - assert(pp_t(u'\u05d0')==u'\u05d0') - assert(pp_t((u'\u05d0',u'\u05d1'))==u'(\u05d0, \u05d1)') - assert(pp_t((u'\u05d0',(u'\u05d1',u'\u05d2')))== + assert(pp_t('a') == u'a') + assert(pp_t(u'a') == u'a') + assert(pp_t(None) == '') + assert(pp_t(u'\u05d0') == u'\u05d0') + assert(pp_t((u'\u05d0', u'\u05d1')) == u'(\u05d0, \u05d1)') + assert(pp_t((u'\u05d0', (u'\u05d1', u'\u05d2'))) == u'(\u05d0, (\u05d1, \u05d2))') - assert(pp_t(('foo',u'\u05d0',(u'\u05d0',u'\u05d0')))== + assert(pp_t(('foo', u'\u05d0', (u'\u05d0', u'\u05d0'))) == u'(foo, \u05d0, (\u05d0, \u05d0))') - # escape embedded tabs in string # GH #2038 - assert not "\t" in pp_t("a\tb",escape_chars=("\t",)) + assert not "\t" in pp_t("a\tb", escape_chars=("\t",)) class TestTake(unittest.TestCase): @@ -391,7 +408,7 @@ def test_2d_float32(self): # test with float64 out buffer out = np.empty((len(indexer), arr.shape[1]), dtype='f8') - com.take_2d(arr, indexer, out=out) # it works! + com.take_2d(arr, indexer, out=out) # it works! # axis=1 result = com.take_2d(arr, indexer, axis=1) @@ -404,5 +421,5 @@ def test_2d_float32(self): tm.assert_almost_equal(result, expected) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index 716402689b70f..11800cfc4e38f 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -6,31 +6,34 @@ import warnings import nose + class TestConfig(unittest.TestCase): _multiprocess_can_split_ = True - def __init__(self,*args): - super(TestConfig,self).__init__(*args) + + def __init__(self, *args): + super(TestConfig, self).__init__(*args) from copy import deepcopy self.cf = pd.core.config - self.gc=deepcopy(getattr(self.cf, '_global_config')) - self.do=deepcopy(getattr(self.cf, '_deprecated_options')) - self.ro=deepcopy(getattr(self.cf, '_registered_options')) + self.gc = deepcopy(getattr(self.cf, '_global_config')) + self.do = deepcopy(getattr(self.cf, '_deprecated_options')) + self.ro = deepcopy(getattr(self.cf, '_registered_options')) def setUp(self): setattr(self.cf, '_global_config', {}) - setattr(self.cf, 'options', self.cf.DictWrapper(self.cf._global_config)) + setattr( + self.cf, 'options', self.cf.DictWrapper(self.cf._global_config)) setattr(self.cf, '_deprecated_options', {}) setattr(self.cf, '_registered_options', {}) def tearDown(self): - setattr(self.cf, '_global_config',self.gc) + setattr(self.cf, '_global_config', self.gc) setattr(self.cf, '_deprecated_options', self.do) setattr(self.cf, '_registered_options', self.ro) def test_api(self): - #the pandas object exposes the user API + # the pandas object exposes the user API self.assertTrue(hasattr(pd, 'get_option')) self.assertTrue(hasattr(pd, 'set_option')) self.assertTrue(hasattr(pd, 'reset_option')) @@ -49,11 +52,10 @@ def test_register_option(self): 'doc') # no python keywords - self.assertRaises(ValueError, self.cf.register_option, 'for',0) + self.assertRaises(ValueError, self.cf.register_option, 'for', 0) # must be valid identifier (ensure attribute access works) self.assertRaises(ValueError, self.cf.register_option, - 'Oh my Goddess!',0) - + 'Oh my Goddess!', 0) # we can register options several levels deep # without predefining the intermediate steps @@ -71,35 +73,44 @@ def test_describe_option(self): self.cf.register_option('c.d.e2', 1, 'doc4') self.cf.register_option('f', 1) self.cf.register_option('g.h', 1) - self.cf.deprecate_option('g.h',rkey="blah") + self.cf.deprecate_option('g.h', rkey="blah") # non-existent keys raise KeyError self.assertRaises(KeyError, self.cf.describe_option, 'no.such.key') # we can get the description for any key we registered - self.assertTrue('doc' in self.cf.describe_option('a',_print_desc=False)) - self.assertTrue('doc2' in self.cf.describe_option('b',_print_desc=False)) - self.assertTrue('precated' in self.cf.describe_option('b',_print_desc=False)) - - self.assertTrue('doc3' in self.cf.describe_option('c.d.e1',_print_desc=False)) - self.assertTrue('doc4' in self.cf.describe_option('c.d.e2',_print_desc=False)) + self.assertTrue( + 'doc' in self.cf.describe_option('a', _print_desc=False)) + self.assertTrue( + 'doc2' in self.cf.describe_option('b', _print_desc=False)) + self.assertTrue( + 'precated' in self.cf.describe_option('b', _print_desc=False)) + + self.assertTrue( + 'doc3' in self.cf.describe_option('c.d.e1', _print_desc=False)) + self.assertTrue( + 'doc4' in self.cf.describe_option('c.d.e2', _print_desc=False)) # if no doc is specified we get a default message # saying "description not available" - self.assertTrue('vailable' in self.cf.describe_option('f',_print_desc=False)) - self.assertTrue('vailable' in self.cf.describe_option('g.h',_print_desc=False)) - self.assertTrue('precated' in self.cf.describe_option('g.h',_print_desc=False)) - self.assertTrue('blah' in self.cf.describe_option('g.h',_print_desc=False)) + self.assertTrue( + 'vailable' in self.cf.describe_option('f', _print_desc=False)) + self.assertTrue( + 'vailable' in self.cf.describe_option('g.h', _print_desc=False)) + self.assertTrue( + 'precated' in self.cf.describe_option('g.h', _print_desc=False)) + self.assertTrue( + 'blah' in self.cf.describe_option('g.h', _print_desc=False)) def test_case_insensitive(self): self.cf.register_option('KanBAN', 1, 'doc') - self.assertTrue('doc' in self.cf.describe_option('kanbaN',_print_desc=False)) + self.assertTrue( + 'doc' in self.cf.describe_option('kanbaN', _print_desc=False)) self.assertEqual(self.cf.get_option('kanBaN'), 1) - self.cf.set_option('KanBan',2) + self.cf.set_option('KanBan', 2) self.assertEqual(self.cf.get_option('kAnBaN'), 2) - # gets of non-existent keys fail self.assertRaises(KeyError, self.cf.get_option, 'no_such_option') self.cf.deprecate_option('KanBan') @@ -149,7 +160,8 @@ def test_validation(self): self.cf.set_option('a', 2) # int is_int self.cf.set_option('b.c', 'wurld') # str is_str - self.assertRaises(ValueError, self.cf.set_option, 'a', None) # None not is_int + self.assertRaises( + ValueError, self.cf.set_option, 'a', None) # None not is_int self.assertRaises(ValueError, self.cf.set_option, 'a', 'ab') self.assertRaises(ValueError, self.cf.set_option, 'b.c', 1) @@ -188,13 +200,13 @@ def test_reset_option_all(self): self.assertEqual(self.cf.get_option('a'), 1) self.assertEqual(self.cf.get_option('b.c'), 'hullo') - def test_deprecate_option(self): import sys - self.cf.deprecate_option('foo') # we can deprecate non-existent options + self.cf.deprecate_option( + 'foo') # we can deprecate non-existent options # testing warning with catch_warning was only added in 2.6 - if sys.version_info[:2]<(2,6): + if sys.version_info[:2] < (2, 6): raise nose.SkipTest() self.assertTrue(self.cf._is_deprecated('foo')) @@ -208,7 +220,8 @@ def test_deprecate_option(self): self.fail("Nonexistent option didn't raise KeyError") self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue('deprecated' in str(w[-1])) # we get the default message + self.assertTrue( + 'deprecated' in str(w[-1])) # we get the default message self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2') @@ -220,10 +233,13 @@ def test_deprecate_option(self): self.cf.get_option('a') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue('eprecated' in str(w[-1])) # we get the default message - self.assertTrue('nifty_ver' in str(w[-1])) # with the removal_ver quoted + self.assertTrue( + 'eprecated' in str(w[-1])) # we get the default message + self.assertTrue( + 'nifty_ver' in str(w[-1])) # with the removal_ver quoted - self.assertRaises(KeyError, self.cf.deprecate_option, 'a') # can't depr. twice + self.assertRaises( + KeyError, self.cf.deprecate_option, 'a') # can't depr. twice self.cf.deprecate_option('b.c', 'zounds!') with warnings.catch_warnings(record=True) as w: @@ -231,7 +247,8 @@ def test_deprecate_option(self): self.cf.get_option('b.c') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue('zounds!' in str(w[-1])) # we get the custom message + self.assertTrue( + 'zounds!' in str(w[-1])) # we get the custom message # test rerouting keys self.cf.register_option('d.a', 'foo', 'doc2') @@ -245,38 +262,43 @@ def test_deprecate_option(self): self.assertEqual(self.cf.get_option('d.dep'), 'foo') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue('eprecated' in str(w[-1])) # we get the custom message + self.assertTrue( + 'eprecated' in str(w[-1])) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') self.cf.set_option('d.dep', 'baz') # should overwrite "d.a" self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue('eprecated' in str(w[-1])) # we get the custom message + self.assertTrue( + 'eprecated' in str(w[-1])) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') self.assertEqual(self.cf.get_option('d.dep'), 'baz') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue('eprecated' in str(w[-1])) # we get the custom message + self.assertTrue( + 'eprecated' in str(w[-1])) # we get the custom message def test_config_prefix(self): with self.cf.config_prefix("base"): - self.cf.register_option('a',1,"doc1") - self.cf.register_option('b',2,"doc2") + self.cf.register_option('a', 1, "doc1") + self.cf.register_option('b', 2, "doc2") self.assertEqual(self.cf.get_option('a'), 1) self.assertEqual(self.cf.get_option('b'), 2) - self.cf.set_option('a',3) - self.cf.set_option('b',4) + self.cf.set_option('a', 3) + self.cf.set_option('b', 4) self.assertEqual(self.cf.get_option('a'), 3) self.assertEqual(self.cf.get_option('b'), 4) self.assertEqual(self.cf.get_option('base.a'), 3) self.assertEqual(self.cf.get_option('base.b'), 4) - self.assertTrue('doc1' in self.cf.describe_option('base.a',_print_desc=False)) - self.assertTrue('doc2' in self.cf.describe_option('base.b',_print_desc=False)) + self.assertTrue( + 'doc1' in self.cf.describe_option('base.a', _print_desc=False)) + self.assertTrue( + 'doc2' in self.cf.describe_option('base.b', _print_desc=False)) self.cf.reset_option('base.a') self.cf.reset_option('base.b') @@ -286,75 +308,78 @@ def test_config_prefix(self): self.assertEqual(self.cf.get_option('b'), 2) def test_callback(self): - k=[None] - v=[None] + k = [None] + v = [None] + def callback(key): k.append(key) v.append(self.cf.get_option(key)) - self.cf.register_option('d.a', 'foo',cb=callback) - self.cf.register_option('d.b', 'foo',cb=callback) + self.cf.register_option('d.a', 'foo', cb=callback) + self.cf.register_option('d.b', 'foo', cb=callback) - del k[-1],v[-1] - self.cf.set_option("d.a","fooz") - self.assertEqual(k[-1],"d.a") - self.assertEqual(v[-1],"fooz") + del k[-1], v[-1] + self.cf.set_option("d.a", "fooz") + self.assertEqual(k[-1], "d.a") + self.assertEqual(v[-1], "fooz") - del k[-1],v[-1] - self.cf.set_option("d.b","boo") - self.assertEqual(k[-1],"d.b") - self.assertEqual(v[-1],"boo") + del k[-1], v[-1] + self.cf.set_option("d.b", "boo") + self.assertEqual(k[-1], "d.b") + self.assertEqual(v[-1], "boo") - del k[-1],v[-1] + del k[-1], v[-1] self.cf.reset_option("d.b") - self.assertEqual(k[-1],"d.b") - + self.assertEqual(k[-1], "d.b") def test_set_ContextManager(self): def eq(val): - self.assertEqual(self.cf.get_option("a"),val) + self.assertEqual(self.cf.get_option("a"), val) - self.cf.register_option('a',0) + self.cf.register_option('a', 0) eq(0) - with self.cf.option_context("a",15): + with self.cf.option_context("a", 15): eq(15) - with self.cf.option_context("a",25): + with self.cf.option_context("a", 25): eq(25) eq(15) eq(0) - self.cf.set_option("a",17) + self.cf.set_option("a", 17) eq(17) def test_attribute_access(self): holder = [] + def f(): - options.b=1 + options.b = 1 + def f2(): - options.display=1 + options.display = 1 + def f3(key): holder.append(True) - self.cf.register_option('a',0) - self.cf.register_option('c',0,cb=f3) - options=self.cf.options + self.cf.register_option('a', 0) + self.cf.register_option('c', 0, cb=f3) + options = self.cf.options - self.assertEqual(options.a,0) - with self.cf.option_context("a",15): - self.assertEqual(options.a,15) + self.assertEqual(options.a, 0) + with self.cf.option_context("a", 15): + self.assertEqual(options.a, 15) - options.a=500 - self.assertEqual(self.cf.get_option("a"),500) + options.a = 500 + self.assertEqual(self.cf.get_option("a"), 500) self.cf.reset_option("a") - self.assertEqual(options.a, self.cf.get_option("a",0)) + self.assertEqual(options.a, self.cf.get_option("a", 0)) - self.assertRaises(KeyError,f) - self.assertRaises(KeyError,f2) + self.assertRaises(KeyError, f) + self.assertRaises(KeyError, f2) # make sure callback kicks when using this form of setting options.c = 1 - self.assertEqual(len(holder),1) + self.assertEqual(len(holder), 1) # fmt.reset_printoptions and fmt.set_printoptions were altered # to use core.config, test_format exercises those paths. diff --git a/pandas/tests/test_factor.py b/pandas/tests/test_factor.py index 550225318e43f..de2fcaa94b59d 100644 --- a/pandas/tests/test_factor.py +++ b/pandas/tests/test_factor.py @@ -17,9 +17,10 @@ class TestCategorical(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.factor = Categorical.from_array(['a', 'b', 'b', 'a', - 'a', 'c', 'c', 'c']) + 'a', 'c', 'c', 'c']) def test_getitem(self): self.assertEqual(self.factor[0], 'a') @@ -112,6 +113,6 @@ def test_na_flags_int_levels(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + # '--with-coverage', '--cover-package=pandas.core'], exit=False) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 1de2becc709a5..1488f4a1f0f90 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -26,12 +26,15 @@ _frame = DataFrame(tm.getSeriesData()) + def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth + class TestDataFrameFormatting(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.warn_filters = warnings.filters warnings.filterwarnings('ignore', @@ -70,7 +73,7 @@ def test_eng_float_formatter(self): def test_repr_tuples(self): buf = StringIO() - df = DataFrame({'tups' : zip(range(10), range(10))}) + df = DataFrame({'tups': zip(range(10), range(10))}) repr(df) df.to_string(col_space=10, buf=buf) @@ -78,8 +81,8 @@ def test_repr_truncation(self): max_len = 20 with option_context("display.max_colwidth", max_len): df = DataFrame({'A': np.random.randn(10), - 'B': [tm.rands(np.random.randint(max_len - 1, - max_len + 1)) for i in range(10)]}) + 'B': [tm.rands(np.random.randint(max_len - 1, + max_len + 1)) for i in range(10)]}) r = repr(df) r = r[r.find('\n') + 1:] @@ -97,7 +100,7 @@ def test_repr_truncation(self): with option_context("display.max_colwidth", max_len + 2): self.assert_('...' not in repr(df)) - def test_repr_should_return_str (self): + def test_repr_should_return_str(self): """ http://docs.python.org/py3k/reference/datamodel.html#object.__repr__ http://docs.python.org/reference/datamodel.html#object.__repr__ @@ -106,24 +109,23 @@ def test_repr_should_return_str (self): (str on py2.x, str (unicode) on py3) """ - data=[8,5,3,5] - index1=[u"\u03c3",u"\u03c4",u"\u03c5",u"\u03c6"] - cols=[u"\u03c8"] - df=DataFrame(data,columns=cols,index=index1) - self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 + data = [8, 5, 3, 5] + index1 = [u"\u03c3", u"\u03c4", u"\u03c5", u"\u03c6"] + cols = [u"\u03c8"] + df = DataFrame(data, columns=cols, index=index1) + self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 def test_repr_no_backslash(self): with option_context('mode.sim_interactive', True): df = DataFrame(np.random.randn(10, 4)) self.assertTrue('\\' not in repr(df)) - def test_to_string_repr_unicode(self): buf = StringIO() unicode_values = [u'\u03c3'] * 10 unicode_values = np.array(unicode_values, dtype=object) - df = DataFrame({'unicode' : unicode_values}) + df = DataFrame({'unicode': unicode_values}) df.to_string(col_space=10, buf=buf) # it works! @@ -146,7 +148,7 @@ def test_to_string_repr_unicode(self): sys.stdin = sys.__stdin__ def test_to_string_unicode_columns(self): - df = DataFrame({u'\u03c3' : np.arange(10.)}) + df = DataFrame({u'\u03c3': np.arange(10.)}) buf = StringIO() df.to_string(buf=buf) @@ -163,7 +165,7 @@ def test_to_string_utf8_columns(self): n = u"\u05d0".encode('utf-8') with option_context('display.max_rows', 1): - df = pd.DataFrame([1,2], columns=[n]) + df = pd.DataFrame([1, 2], columns=[n]) repr(df) def test_to_string_unicode_two(self): @@ -179,8 +181,8 @@ def test_to_string_unicode_three(self): def test_to_string_with_formatters(self): df = DataFrame({'int': [1, 2, 3], 'float': [1.0, 2.0, 3.0], - 'object': [(1,2), True, False]}, - columns=['int', 'float', 'object']) + 'object': [(1, 2), True, False]}, + columns=['int', 'float', 'object']) formatters = [('int', lambda x: '0x%x' % x), ('float', lambda x: '[% 4.1f]' % x), @@ -194,18 +196,18 @@ def test_to_string_with_formatters(self): self.assertEqual(result, result2) def test_to_string_with_formatters_unicode(self): - df = DataFrame({u'c/\u03c3':[1,2,3]}) + df = DataFrame({u'c/\u03c3': [1, 2, 3]}) result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x}) self.assertEqual(result, (u' c/\u03c3\n' - '0 1\n' - '1 2\n' - '2 3')) + '0 1\n' + '1 2\n' + '2 3')) def test_to_string_buffer_all_unicode(self): buf = StringIO() - empty = DataFrame({u'c/\u03c3':Series()}) - nonempty = DataFrame({u'c/\u03c3':Series([1,2,3])}) + empty = DataFrame({u'c/\u03c3': Series()}) + nonempty = DataFrame({u'c/\u03c3': Series([1, 2, 3])}) print >>buf, empty print >>buf, nonempty @@ -214,34 +216,34 @@ def test_to_string_buffer_all_unicode(self): buf.getvalue() def test_to_string_with_col_space(self): - df = DataFrame(np.random.random(size=(1,3))) - c10=len(df.to_string(col_space=10).split("\n")[1]) - c20=len(df.to_string(col_space=20).split("\n")[1]) - c30=len(df.to_string(col_space=30).split("\n")[1]) - self.assertTrue( c10 < c20 < c30 ) + df = DataFrame(np.random.random(size=(1, 3))) + c10 = len(df.to_string(col_space=10).split("\n")[1]) + c20 = len(df.to_string(col_space=20).split("\n")[1]) + c30 = len(df.to_string(col_space=30).split("\n")[1]) + self.assertTrue(c10 < c20 < c30) def test_to_html_with_col_space(self): - def check_with_width(df,col_space): + def check_with_width(df, col_space): import re # check that col_space affects HTML generation # and be very brittle about it. html = df.to_html(col_space=col_space) - hdrs = [x for x in html.split("\n") if re.search("\s]",x)] - self.assertTrue(len(hdrs) > 0 ) + hdrs = [x for x in html.split("\n") if re.search("\s]", x)] + self.assertTrue(len(hdrs) > 0) for h in hdrs: - self.assertTrue("min-width" in h ) - self.assertTrue(str(col_space) in h ) + self.assertTrue("min-width" in h) + self.assertTrue(str(col_space) in h) - df = DataFrame(np.random.random(size=(1,3))) + df = DataFrame(np.random.random(size=(1, 3))) - check_with_width(df,30) - check_with_width(df,50) + check_with_width(df, 30) + check_with_width(df, 50) def test_to_html_unicode(self): # it works! - df = DataFrame({u'\u03c3' : np.arange(10.)}) + df = DataFrame({u'\u03c3': np.arange(10.)}) df.to_html() - df = DataFrame({'A' : [u'\u03c3']}) + df = DataFrame({'A': [u'\u03c3']}) df.to_html() def test_to_html_multiindex_sparsify(self): @@ -386,7 +388,6 @@ def test_to_html_index_formatter(self): """ self.assertEquals(result, expected) - def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) rep_str = df.to_string() @@ -394,19 +395,19 @@ def test_nonunicode_nonascii_alignment(self): self.assert_(len(lines[1]) == len(lines[2])) def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({u'c/\u03c3': Series({'test':np.NaN})}) + dm = DataFrame({u'c/\u03c3': Series({'test': np.NaN})}) unicode(dm.to_string()) def test_string_repr_encoding(self): pth = curpath() filepath = os.path.join(pth, 'data', 'unicode_series.csv') - df = pandas.read_csv(filepath, header=None,encoding='latin1') + df = pandas.read_csv(filepath, header=None, encoding='latin1') repr(df) repr(df[1]) def test_repr_corner(self): # representing infs poses no problems - df = DataFrame({'foo' : np.inf * np.empty(10)}) + df = DataFrame({'foo': np.inf * np.empty(10)}) foo = repr(df) def test_frame_info_encoding(self): @@ -524,10 +525,10 @@ def test_wide_repr_unicode(self): reset_option('display.expand_frame_repr') - def test_wide_repr_wide_long_columns(self): with option_context('mode.sim_interactive', True): - df = DataFrame({'a': ['a'*30, 'b'*30], 'b': ['c'*70, 'd'*80]}) + df = DataFrame( + {'a': ['a' * 30, 'b' * 30], 'b': ['c' * 70, 'd' * 80]}) result = repr(df) self.assertTrue('ccccc' in result) @@ -538,9 +539,9 @@ def test_to_string(self): import re # big mixed - biggie = DataFrame({'A' : randn(200), - 'B' : tm.makeStringIndex(200)}, - index=range(200)) + biggie = DataFrame({'A': randn(200), + 'B': tm.makeStringIndex(200)}, + index=range(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -575,7 +576,7 @@ def test_to_string(self): self.assertEqual(header, expected) biggie.to_string(columns=['B', 'A'], - formatters={'A' : lambda x: '%.1f' % x}) + formatters={'A': lambda x: '%.1f' % x}) biggie.to_string(columns=['B', 'A'], float_format=str) biggie.to_string(columns=['B', 'A'], col_space=12, @@ -585,8 +586,8 @@ def test_to_string(self): frame.to_string() def test_to_string_no_header(self): - df = DataFrame({'x' : [1, 2, 3], - 'y' : [4, 5, 6]}) + df = DataFrame({'x': [1, 2, 3], + 'y': [4, 5, 6]}) df_s = df.to_string(header=False) expected = "0 1 4\n1 2 5\n2 3 6" @@ -594,8 +595,8 @@ def test_to_string_no_header(self): assert(df_s == expected) def test_to_string_no_index(self): - df = DataFrame({'x' : [1, 2, 3], - 'y' : [4, 5, 6]}) + df = DataFrame({'x': [1, 2, 3], + 'y': [4, 5, 6]}) df_s = df.to_string(index=False) expected = " x y\n 1 4\n 2 5\n 3 6" @@ -607,13 +608,13 @@ def test_to_string_float_formatting(self): fmt.set_printoptions(precision=6, column_space=12, notebook_repr_html=False) - df = DataFrame({'x' : [0, 0.25, 3456.000, 12e+45, 1.64e+6, - 1.7e+8, 1.253456, np.pi, -1e6]}) + df = DataFrame({'x': [0, 0.25, 3456.000, 12e+45, 1.64e+6, + 1.7e+8, 1.253456, np.pi, -1e6]}) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit - #sys.version_info[0] == 2 and sys.version_info[1] < 6: + # sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = (' x\n0 0.00000e+000\n1 2.50000e-001\n' '2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n' @@ -626,7 +627,7 @@ def test_to_string_float_formatting(self): '8 -1.00000e+06') assert(df_s == expected) - df = DataFrame({'x' : [3234, 0.253]}) + df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string() expected = (' x\n' @@ -640,7 +641,7 @@ def test_to_string_float_formatting(self): df = DataFrame({'x': [1e9, 0.2512]}) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit - #sys.version_info[0] == 2 and sys.version_info[1] < 6: + # sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = (' x\n' '0 1.000000e+009\n' @@ -701,7 +702,7 @@ def test_to_string_ascii_error(self): repr(df) def test_to_string_int_formatting(self): - df = DataFrame({'x' : [-15, 20, 25, -35]}) + df = DataFrame({'x': [-15, 20, 25, -35]}) self.assert_(issubclass(df['x'].dtype.type, np.integer)) output = df.to_string() @@ -727,7 +728,7 @@ def test_to_string_index_formatter(self): def test_to_string_left_justify_cols(self): fmt.reset_printoptions() - df = DataFrame({'x' : [3234, 0.253]}) + df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string(justify='left') expected = (' x \n' '0 3234.000\n' @@ -736,8 +737,8 @@ def test_to_string_left_justify_cols(self): def test_to_string_format_na(self): fmt.reset_printoptions() - df = DataFrame({'A' : [np.nan, -1, -2.1234, 3, 4], - 'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) + df = DataFrame({'A': [np.nan, -1, -2.1234, 3, 4], + 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) result = df.to_string() expected = (' A B\n' @@ -748,8 +749,8 @@ def test_to_string_format_na(self): '4 4.0000 bar') self.assertEqual(result, expected) - df = DataFrame({'A' : [np.nan, -1., -2., 3., 4.], - 'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) + df = DataFrame({'A': [np.nan, -1., -2., 3., 4.], + 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) result = df.to_string() expected = (' A B\n' @@ -762,9 +763,9 @@ def test_to_string_format_na(self): def test_to_html(self): # big mixed - biggie = DataFrame({'A' : randn(200), - 'B' : tm.makeStringIndex(200)}, - index=range(200)) + biggie = DataFrame({'A': randn(200), + 'B': tm.makeStringIndex(200)}, + index=range(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -779,7 +780,7 @@ def test_to_html(self): biggie.to_html(columns=['B', 'A'], col_space=17) biggie.to_html(columns=['B', 'A'], - formatters={'A' : lambda x: '%.1f' % x}) + formatters={'A': lambda x: '%.1f' % x}) biggie.to_html(columns=['B', 'A'], float_format=str) biggie.to_html(columns=['B', 'A'], col_space=12, @@ -958,12 +959,12 @@ def test_to_html_index(self): 'B': [1.2, 3.4, 5.6], 'C': ['one', 'two', np.NaN]}, columns=['A', 'B', 'C'], - index = index) + index=index) result = df.to_html(index=False) for i in index: self.assert_(i not in result) - tuples = [('foo', 'car'), ('foo', 'bike'), ('bar' ,'car')] + tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] df.index = pandas.MultiIndex.from_tuples(tuples) result = df.to_html(index=False) for i in ['foo', 'bar', 'car', 'bike']: @@ -982,9 +983,9 @@ def test_repr_html(self): def test_fake_qtconsole_repr_html(self): def get_ipython(): - return {'config' : - {'KernelApp' : - {'parent_appname' : 'ipython-qtconsole'}}} + return {'config': + {'KernelApp': + {'parent_appname': 'ipython-qtconsole'}}} repstr = self.frame._repr_html_() self.assert_(repstr is not None) @@ -1027,7 +1028,7 @@ def test_float_trim_zeros(self): skip = False def test_dict_entries(self): - df = DataFrame({'A': [{'a':1, 'b':2}]}) + df = DataFrame({'A': [{'a': 1, 'b': 2}]}) val = df.to_string() self.assertTrue("'a': 1" in val) @@ -1037,8 +1038,10 @@ def test_to_latex(self): # it works! self.frame.to_latex() + class TestSeriesFormatting(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.ts = tm.makeTimeSeries() @@ -1126,9 +1129,9 @@ def test_to_string_float_na_spacing(self): self.assertEqual(result, expected) def test_unicode_name_in_footer(self): - s=Series([1,2],name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') - sf=fmt.SeriesFormatter(s,name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') - sf._get_footer() # should not raise exception + s = Series([1, 2], name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') + sf = fmt.SeriesFormatter(s, name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') + sf._get_footer() # should not raise exception def test_float_trim_zeros(self): vals = [2.08430917305e+10, 3.52205017305e+10, 2.30674817305e+10, @@ -1141,8 +1144,8 @@ def test_float_trim_zeros(self): def test_timedelta64(self): Series(np.array([1100, 20], dtype='timedelta64[s]')).to_string() - #check this works - #GH2146 + # check this works + # GH2146 def test_mixed_datetime64(self): df = DataFrame({'A': [1, 2], @@ -1152,10 +1155,12 @@ def test_mixed_datetime64(self): result = repr(df.ix[0]) self.assertTrue('2012-01-01' in result) + class TestEngFormatter(unittest.TestCase): _multiprocess_can_split_ = True + def test_eng_float_formatter(self): - df = DataFrame({'A' : [1.41, 141., 14100, 1410000.]}) + df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) fmt.set_eng_float_format() result = df.to_string() @@ -1351,9 +1356,11 @@ def test_rounding(self): result = formatter(0) self.assertEqual(result, u' 0.000') + def _three_digit_exp(): return '%.4g' % 1.7e8 == '1.7e+008' + class TestFloatArrayFormatter(unittest.TestCase): def test_misc(self): @@ -1362,5 +1369,5 @@ def test_misc(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 81f4055abcf7b..80a55db84f0e8 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -35,6 +35,7 @@ from numpy.testing.decorators import slow + def _skip_if_no_scipy(): try: import scipy.stats @@ -46,6 +47,7 @@ def _skip_if_no_scipy(): JOIN_TYPES = ['inner', 'outer', 'left', 'right'] + class CheckIndexing(object): _multiprocess_can_split_ = True @@ -69,7 +71,7 @@ def test_getitem(self): self.assertRaises(Exception, self.frame.__getitem__, 'random') def test_getitem_dupe_cols(self): - df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) try: df[['baf']] except KeyError: @@ -159,13 +161,14 @@ def test_getitem_boolean(self): # test df[df >0] works bif = self.tsframe[self.tsframe > 0] bifw = DataFrame(np.where(self.tsframe > 0, self.tsframe, np.nan), - index=self.tsframe.index,columns=self.tsframe.columns) - self.assert_(isinstance(bif,DataFrame)) + index=self.tsframe.index, columns=self.tsframe.columns) + self.assert_(isinstance(bif, DataFrame)) self.assert_(bif.shape == self.tsframe.shape) - assert_frame_equal(bif,bifw) + assert_frame_equal(bif, bifw) def test_getitem_boolean_list(self): - df = DataFrame(np.arange(12).reshape(3,4)) + df = DataFrame(np.arange(12).reshape(3, 4)) + def _checkit(lst): result = df[lst] expected = df.ix[df.index[lst]] @@ -225,7 +228,7 @@ def test_getitem_setitem_ix_negative_integers(self): self.assert_(isnull(df.ix[:, [-1]].values).all()) # #1942 - a = DataFrame(randn(20,2), index=[chr(x+65) for x in range(20)]) + a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)]) a.ix[-1] = a.ix[-2] assert_series_equal(a.ix[-1], a.ix[-2]) @@ -236,7 +239,7 @@ def test_getattr(self): 'NONEXISTENT_NAME') def test_setattr_column(self): - df = DataFrame({'foobar' : 1}, index=range(10)) + df = DataFrame({'foobar': 1}, index=range(10)) df.foobar = 5 self.assert_((df.foobar == 5).all()) @@ -247,12 +250,12 @@ def test_setitem(self): self.frame['col5'] = series self.assert_('col5' in self.frame) tm.assert_dict_equal(series, self.frame['col5'], - compare_keys=False) + compare_keys=False) series = self.frame['A'] self.frame['col6'] = series tm.assert_dict_equal(series, self.frame['col6'], - compare_keys=False) + compare_keys=False) self.assertRaises(Exception, self.frame.__setitem__, randn(len(self.frame) + 1)) @@ -357,9 +360,9 @@ def test_setitem_boolean_column(self): def test_setitem_corner(self): # corner case - df = DataFrame({'B' : [1., 2., 3.], - 'C' : ['a', 'b', 'c']}, - index=np.arange(3)) + df = DataFrame({'B': [1., 2., 3.], + 'C': ['a', 'b', 'c']}, + index=np.arange(3)) del df['B'] df['B'] = [1., 2., 3.] self.assert_('B' in df) @@ -396,8 +399,8 @@ def test_setitem_corner(self): self.assertEqual(dm['coercable'].dtype, np.object_) def test_setitem_corner2(self): - data = {"title" : ['foobar','bar','foobar'] + ['foobar'] * 17 , - "cruft" : np.random.random(20)} + data = {"title": ['foobar', 'bar', 'foobar'] + ['foobar'] * 17, + "cruft": np.random.random(20)} df = DataFrame(data) ix = df[df['title'] == 'bar'].index @@ -405,8 +408,8 @@ def test_setitem_corner2(self): df.ix[ix, ['title']] = 'foobar' df.ix[ix, ['cruft']] = 0 - assert( df.ix[1, 'title'] == 'foobar' ) - assert( df.ix[1, 'cruft'] == 0 ) + assert(df.ix[1, 'title'] == 'foobar') + assert(df.ix[1, 'cruft'] == 0) def test_setitem_ambig(self): # difficulties with mixed-type data @@ -435,7 +438,7 @@ def test_setitem_ambig(self): def test_setitem_clear_caches(self): # GH #304 df = DataFrame({'x': [1.1, 2.1, 3.1, 4.1], 'y': [5.1, 6.1, 7.1, 8.1]}, - index=[0,1,2,3]) + index=[0, 1, 2, 3]) df.insert(2, 'z', np.nan) # cache it @@ -628,15 +631,14 @@ def test_setitem_fancy_2d(self): assert_frame_equal(frame, expected) # new corner case of boolean slicing / setting - frame = DataFrame(zip([2,3,9,6,7], [np.nan]*5), - columns=['a','b']) + frame = DataFrame(zip([2, 3, 9, 6, 7], [np.nan] * 5), + columns=['a', 'b']) lst = [100] - lst.extend([np.nan]*4) - expected = DataFrame(zip([100,3,9,6,7], lst), columns=['a','b']) + lst.extend([np.nan] * 4) + expected = DataFrame(zip([100, 3, 9, 6, 7], lst), columns=['a', 'b']) frame[frame['a'] == 2] = 100 assert_frame_equal(frame, expected) - def test_fancy_getitem_slice_mixed(self): sliced = self.mixed_frame.ix[:, -3:] self.assert_(sliced['D'].dtype == np.float64) @@ -804,7 +806,7 @@ def test_ix_assign_column_mixed(self): def test_ix_multi_take(self): df = DataFrame(np.random.randn(3, 2)) - rs = df.ix[df.index==0, :] + rs = df.ix[df.index == 0, :] xp = df.reindex([0]) assert_frame_equal(rs, xp) @@ -816,15 +818,15 @@ def test_ix_multi_take(self): """ def test_ix_multi_take_nonint_index(self): - df = DataFrame(np.random.randn(3, 2), index=['x','y','z'], - columns=['a','b']) + df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'], + columns=['a', 'b']) rs = df.ix[[0], [0]] xp = df.reindex(['x'], columns=['a']) assert_frame_equal(rs, xp) def test_ix_multi_take_multiindex(self): - df = DataFrame(np.random.randn(3, 2), index=['x','y','z'], - columns=[['a','b'], ['1','2']]) + df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'], + columns=[['a', 'b'], ['1', '2']]) rs = df.ix[[0], [0]] xp = df.reindex(['x'], columns=[('a', '1')]) assert_frame_equal(rs, xp) @@ -842,7 +844,6 @@ def test_ix_dup(self): sub = df.ix['b':'d'] assert_frame_equal(sub, df.ix[2:]) - def test_getitem_fancy_1d(self): f = self.frame ix = f.ix @@ -953,7 +954,7 @@ def test_setitem_fancy_scalar(self): for idx in f.index[::5]: i = f.index.get_loc(idx) val = randn() - expected.values[i,j] = val + expected.values[i, j] = val ix[idx, col] = val assert_frame_equal(f, expected) @@ -998,8 +999,8 @@ def test_setitem_fancy_boolean(self): assert_frame_equal(frame, expected) def test_getitem_fancy_ints(self): - result = self.frame.ix[[1,4,7]] - expected = self.frame.ix[self.frame.index[[1,4,7]]] + result = self.frame.ix[[1, 4, 7]] + expected = self.frame.ix[self.frame.index[[1, 4, 7]]] assert_frame_equal(result, expected) result = self.frame.ix[:, [2, 0, 1]] @@ -1081,33 +1082,35 @@ def test_setitem_single_column_mixed_datetime(self): # check our dtypes result = df.get_dtype_counts() - expected = Series({ 'float64' : 3, 'datetime64[ns]' : 1}) + expected = Series({'float64': 3, 'datetime64[ns]': 1}) assert_series_equal(result, expected) # set an allowable datetime64 type from pandas import tslib - df.ix['b','timestamp'] = tslib.iNaT - self.assert_(com.isnull(df.ix['b','timestamp'])) + df.ix['b', 'timestamp'] = tslib.iNaT + self.assert_(com.isnull(df.ix['b', 'timestamp'])) # allow this syntax - df.ix['c','timestamp'] = nan - self.assert_(com.isnull(df.ix['c','timestamp'])) + df.ix['c', 'timestamp'] = nan + self.assert_(com.isnull(df.ix['c', 'timestamp'])) # allow this syntax - df.ix['d',:] = nan - self.assert_(com.isnull(df.ix['c',:]).all() == False) + df.ix['d', :] = nan + self.assert_(com.isnull(df.ix['c', :]).all() == False) # try to set with a list like item - self.assertRaises(Exception, df.ix.__setitem__, ('d','timestamp'), [nan]) + self.assertRaises( + Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan]) # prior to 0.10.1 this failed - #self.assertRaises(TypeError, df.ix.__setitem__, ('c','timestamp'), nan) + # self.assertRaises(TypeError, df.ix.__setitem__, ('c','timestamp'), + # nan) def test_setitem_frame(self): piece = self.frame.ix[:2, ['A', 'B']] self.frame.ix[-2:, ['A', 'B']] = piece.values assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values, - piece.values) + piece.values) piece = self.mixed_frame.ix[:2, ['A', 'B']] f = self.mixed_frame.ix.__setitem__ @@ -1120,7 +1123,7 @@ def test_setitem_frame_align(self): piece.columns = ['A', 'B'] self.frame.ix[-2:, ['A', 'B']] = piece assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values, - piece.values) + piece.values) def test_setitem_fancy_exceptions(self): pass @@ -1177,7 +1180,7 @@ def test_getitem_setitem_ix_bool_keyerror(self): def test_getitem_list_duplicates(self): # #1943 - df = DataFrame(np.random.randn(4,4), columns=list('AABC')) + df = DataFrame(np.random.randn(4, 4), columns=list('AABC')) df.columns.name = 'foo' result = df[['B', 'C']] @@ -1194,9 +1197,9 @@ def test_get_value(self): assert_almost_equal(result, expected) def test_iteritems(self): - df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) - for k,v in df.iteritems(): - self.assertEqual(type(v),Series) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) + for k, v in df.iteritems(): + self.assertEqual(type(v), Series) def test_lookup(self): def alt(df, rows, cols): @@ -1215,10 +1218,10 @@ def testit(df): testit(self.mixed_frame) testit(self.frame) - df = DataFrame({'label' : ['a', 'b', 'a', 'c'], - 'mask_a' : [True, True, False, True], - 'mask_b' : [True, False, False, False], - 'mask_c' : [False, True, False, True]}) + df = DataFrame({'label': ['a', 'b', 'a', 'c'], + 'mask_a': [True, True, False, True], + 'mask_b': [True, False, False, False], + 'mask_c': [False, True, False, True]}) df['mask'] = df.lookup(df.index, 'mask_' + df['label']) exp_mask = alt(df, df.index, 'mask_' + df['label']) assert_almost_equal(df['mask'], exp_mask) @@ -1260,7 +1263,7 @@ def test_set_value_resize(self): self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): - df = DataFrame(randn(3,3), index=range(3), columns=list('ABC')) + df = DataFrame(randn(3, 3), index=range(3), columns=list('ABC')) res = df.set_value('C', 2, 1.0) self.assert_(list(res.index) == list(df.index) + ['C']) self.assert_(list(res.columns) == list(df.columns) + [2]) @@ -1333,7 +1336,7 @@ def test_icol(self): assert_frame_equal(result, expected) def test_irow_icol_duplicates(self): - df = DataFrame(np.random.rand(3,3), columns=list('ABC'), + df = DataFrame(np.random.rand(3, 3), columns=list('ABC'), index=list('aab')) result = df.irow(0) @@ -1348,10 +1351,10 @@ def test_irow_icol_duplicates(self): assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) - #multiindex + # multiindex df = DataFrame(np.random.randn(3, 3), columns=[['i', 'i', 'j'], ['A', 'A', 'B']], - index = [['i', 'i', 'j'], ['X', 'X', 'Y']]) + index=[['i', 'i', 'j'], ['X', 'X', 'Y']]) rs = df.irow(0) xp = df.ix[0] assert_series_equal(rs, xp) @@ -1365,15 +1368,15 @@ def test_irow_icol_duplicates(self): assert_frame_equal(rs, xp) # #2259 - df = DataFrame([[1,2,3],[4,5,6]], columns=[1,1,2]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) result = df.icol([0]) expected = df.take([0], axis=1) assert_frame_equal(result, expected) def test_icol_sparse_propegate_fill_value(self): from pandas.sparse.api import SparseDataFrame - df=SparseDataFrame({'A' : [999,1]},default_fill_value=999) - self.assertTrue( len(df['A'].sp_values) == len(df.icol(0).sp_values)) + df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) + self.assertTrue(len(df['A'].sp_values) == len(df.icol(0).sp_values)) def test_iget_value(self): for i, row in enumerate(self.frame.index): @@ -1387,15 +1390,16 @@ def test_nested_exception(self): # (which may get fixed), it's just a way to trigger # the issue or reraising an outer exception without # a named argument - df=DataFrame({"a":[1,2,3],"b":[4,5,6],"c":[7,8,9]}).set_index(["a","b"]) - l=list(df.index) - l[0]=["a","b"] - df.index=l + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, + 9]}).set_index(["a", "b"]) + l = list(df.index) + l[0] = ["a", "b"] + df.index = l try: repr(df) - except Exception,e: - self.assertNotEqual(type(e),UnboundLocalError) + except Exception, e: + self.assertNotEqual(type(e), UnboundLocalError) _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() @@ -1410,6 +1414,7 @@ def test_nested_exception(self): _mixed_frame = _frame.copy() _mixed_frame['foo'] = 'bar' + class SafeForSparse(object): _multiprocess_can_split_ = True @@ -1552,10 +1557,10 @@ def setUp(self): self.ts4 = tm.makeTimeSeries()[1:-1] self.ts_dict = { - 'col1' : self.ts1, - 'col2' : self.ts2, - 'col3' : self.ts3, - 'col4' : self.ts4, + 'col1': self.ts1, + 'col2': self.ts2, + 'col3': self.ts3, + 'col4': self.ts4, } self.empty = DataFrame({}) @@ -1564,7 +1569,7 @@ def setUp(self): [7., 8., 9.]]) self.simple = DataFrame(arr, columns=['one', 'two', 'three'], - index=['a', 'b', 'c']) + index=['a', 'b', 'c']) def test_get_axis(self): self.assert_(DataFrame._get_axis_name(0) == 'index') @@ -1590,16 +1595,16 @@ def test_set_index(self): # cache it _ = self.mixed_frame['foo'] self.mixed_frame.index = idx - self.assert_(self.mixed_frame['foo'].index is idx) + self.assert_(self.mixed_frame['foo'].index is idx) self.assertRaises(Exception, setattr, self.mixed_frame, 'index', idx[::2]) def test_set_index2(self): - df = DataFrame({'A' : ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B' : ['one', 'two', 'three', 'one', 'two'], - 'C' : ['a', 'b', 'c', 'd', 'e'], - 'D' : np.random.randn(5), - 'E' : np.random.randn(5)}) + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5)}) # new object, single-column result = df.set_index('C') @@ -1676,31 +1681,31 @@ def test_set_index2(self): self.assertEqual(result.index.name, 'C') def test_set_index_nonuniq(self): - df = DataFrame({'A' : ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B' : ['one', 'two', 'three', 'one', 'two'], - 'C' : ['a', 'b', 'c', 'd', 'e'], - 'D' : np.random.randn(5), - 'E' : np.random.randn(5)}) + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5)}) self.assertRaises(Exception, df.set_index, 'A', verify_integrity=True, inplace=True) self.assert_('A' in df) def test_set_index_bug(self): - #GH1590 - df = DataFrame({'val' : [0, 1, 2], 'key': ['a', 'b', 'c']}) - df2 = df.select(lambda indx:indx>=1) + # GH1590 + df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']}) + df2 = df.select(lambda indx: indx >= 1) rs = df2.set_index('key') xp = DataFrame({'val': [1, 2]}, Index(['b', 'c'], name='key')) assert_frame_equal(rs, xp) def test_set_index_pass_arrays(self): - df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B' : ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C' : np.random.randn(8), - 'D' : np.random.randn(8)}) + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) # multiple columns result = df.set_index(['A', df['B'].values], drop=False) @@ -1708,9 +1713,9 @@ def test_set_index_pass_arrays(self): assert_frame_equal(result, expected) def test_set_index_cast_datetimeindex(self): - df = DataFrame({'A' : [datetime(2000, 1, 1) + timedelta(i) - for i in range(1000)], - 'B' : np.random.randn(1000)}) + df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i) + for i in range(1000)], + 'B': np.random.randn(1000)}) idf = df.set_index('A') self.assert_(isinstance(idf.index, DatetimeIndex)) @@ -1727,11 +1732,11 @@ def test_set_index_multiindexcolumns(self): def test_set_index_empty_column(self): # #1971 df = DataFrame([ - dict(a=1, p=0), - dict(a=2, m=10), - dict(a=3, m=11, p=20), - dict(a=4, m=12, p=21) - ], columns=('a', 'm', 'p', 'x')) + dict(a=1, p=0), + dict(a=2, m=10), + dict(a=3, m=11, p=20), + dict(a=4, m=12, p=21) + ], columns=('a', 'm', 'p', 'x')) # it works! result = df.set_index(['a', 'x']) @@ -1809,12 +1814,12 @@ def test_constructor_rec(self): assert_frame_equal(df3, expected) def test_constructor_bool(self): - df = DataFrame({0 : np.ones(10, dtype=bool), - 1 : np.zeros(10, dtype=bool)}) + df = DataFrame({0: np.ones(10, dtype=bool), + 1: np.zeros(10, dtype=bool)}) self.assertEqual(df.values.dtype, np.bool_) def test_constructor_overflow_int64(self): - values = np.array([2**64 - i for i in range(1, 10)], + values = np.array([2 ** 64 - i for i in range(1, 10)], dtype=np.uint64) result = DataFrame({'a': values}) @@ -1825,12 +1830,11 @@ def test_constructor_overflow_int64(self): (8921811264899370420, 45), (17019687244989530680L, 270), (9930107427299601010L, 273)] dtype = [('uid', 'u8'), ('score', 'u8')] - data = np.zeros((len(data_scores),),dtype=dtype) + data = np.zeros((len(data_scores),), dtype=dtype) data[:] = data_scores df_crawls = DataFrame(data) self.assert_(df_crawls['uid'].dtype == object) - def test_is_mixed_type(self): self.assert_(not self.frame._is_mixed_type) self.assert_(self.mixed_frame._is_mixed_type) @@ -1840,20 +1844,20 @@ def test_constructor_ordereddict(self): nitems = 100 nums = range(nitems) random.shuffle(nums) - expected=['A%d' %i for i in nums] - df=DataFrame(OrderedDict(zip(expected,[[0]]*nitems))) - self.assertEqual(expected,list(df.columns)) + expected = ['A%d' % i for i in nums] + df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) + self.assertEqual(expected, list(df.columns)) def test_constructor_dict(self): - frame = DataFrame({'col1' : self.ts1, - 'col2' : self.ts2}) + frame = DataFrame({'col1': self.ts1, + 'col2': self.ts2}) tm.assert_dict_equal(self.ts1, frame['col1'], compare_keys=False) tm.assert_dict_equal(self.ts2, frame['col2'], compare_keys=False) - frame = DataFrame({'col1' : self.ts1, - 'col2' : self.ts2}, - columns=['col2', 'col3', 'col4']) + frame = DataFrame({'col1': self.ts1, + 'col2': self.ts2}, + columns=['col2', 'col3', 'col4']) self.assertEqual(len(frame), len(self.ts2)) self.assert_('col1' not in frame) @@ -1865,12 +1869,11 @@ def test_constructor_dict(self): # mix dict and array, wrong size self.assertRaises(Exception, DataFrame, - {'A' : {'a' : 'a', 'b' : 'b'}, - 'B' : ['a', 'b', 'c']}) - + {'A': {'a': 'a', 'b': 'b'}, + 'B': ['a', 'b', 'c']}) # Length-one dict micro-optimization - frame = DataFrame({'A' : {'1' : 1, '2' : 2}}) + frame = DataFrame({'A': {'1': 1, '2': 2}}) self.assert_(np.array_equal(frame.index, ['1', '2'])) # empty dict plus index @@ -1886,7 +1889,7 @@ def test_constructor_dict(self): self.assertEqual(len(frame._series), 3) # with dict of empty list and Series - frame = DataFrame({'A' : [], 'B' : []}, columns=['A', 'B']) + frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B']) self.assert_(frame.index.equals(Index([]))) def test_constructor_subclass_dict(self): @@ -1915,15 +1918,15 @@ def test_constructor_subclass_dict(self): def test_constructor_dict_block(self): expected = [[4., 3., 2., 1.]] - df = DataFrame({'d' : [4.],'c' : [3.],'b' : [2.],'a' : [1.]}, + df = DataFrame({'d': [4.], 'c': [3.], 'b': [2.], 'a': [1.]}, columns=['d', 'c', 'b', 'a']) assert_almost_equal(df.values, expected) def test_constructor_dict_cast(self): # cast float tests test_data = { - 'A' : {'1' : 1, '2' : 2}, - 'B' : {'1' : '1', '2' : '2', '3' : '3'}, + 'A': {'1': 1, '2': 2}, + 'B': {'1': '1', '2': '2', '3': '3'}, } frame = DataFrame(test_data, dtype=float) self.assertEqual(len(frame), 3) @@ -1937,8 +1940,8 @@ def test_constructor_dict_cast(self): # can't cast to float test_data = { - 'A' : dict(zip(range(20), tm.makeStringIndex(20))), - 'B' : dict(zip(range(15), randn(15))) + 'A': dict(zip(range(20), tm.makeStringIndex(20))), + 'B': dict(zip(range(15), randn(15))) } frame = DataFrame(test_data, dtype=float) self.assertEqual(len(frame), 20) @@ -1950,7 +1953,7 @@ def test_constructor_dict_dont_upcast(self): df = DataFrame(d) self.assert_(isinstance(df['Col1']['Row2'], float)) - dm = DataFrame([[1,2],['a','b']], index=[1,2], columns=[1,2]) + dm = DataFrame([[1, 2], ['a', 'b']], index=[1, 2], columns=[1, 2]) self.assert_(isinstance(dm[1][1], int)) def test_constructor_dict_of_tuples(self): @@ -1972,7 +1975,7 @@ def test_constructor_ndarray(self): # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=int) + index=[1, 2], dtype=int) self.assert_(frame.values.dtype == np.int64) # 1-D input @@ -2024,13 +2027,13 @@ def test_constructor_maskedarray(self): # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=int) + index=[1, 2], dtype=int) self.assert_(frame.values.dtype == np.int64) # Check non-masked values mat2 = ma.copy(mat) - mat2[0,0] = 1.0 - mat2[1,2] = 2.0 + mat2[0, 0] = 1.0 + mat2[1, 2] = 2.0 frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(1.0, frame['A'][1]) self.assertEqual(2.0, frame['C'][2]) @@ -2087,8 +2090,8 @@ def test_constructor_maskedarray_nonfloat(self): # Check non-masked values mat2 = ma.copy(mat) - mat2[0,0] = 1 - mat2[1,2] = 2 + mat2[0, 0] = 1 + mat2[1, 2] = 2 frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(1, frame['A'][1]) self.assertEqual(2, frame['C'][2]) @@ -2104,13 +2107,13 @@ def test_constructor_maskedarray_nonfloat(self): # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=np.int64) + index=[1, 2], dtype=np.int64) self.assert_(frame.values.dtype == np.int64) # Check non-masked values mat2 = ma.copy(mat) - mat2[0,0] = 1 - mat2[1,2] = 2 + mat2[0, 0] = 1 + mat2[1, 2] = 2 frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(1, frame['A'].view('i8')[1]) self.assertEqual(2, frame['C'].view('i8')[2]) @@ -2126,13 +2129,13 @@ def test_constructor_maskedarray_nonfloat(self): # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=object) + index=[1, 2], dtype=object) self.assert_(frame.values.dtype == object) # Check non-masked values mat2 = ma.copy(mat) - mat2[0,0] = True - mat2[1,2] = False + mat2[0, 0] = True + mat2[1, 2] = False frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(True, frame['A'][1]) self.assertEqual(False, frame['C'][2]) @@ -2142,11 +2145,11 @@ def test_constructor_corner(self): self.assertEqual(df.values.shape, (0, 0)) # empty but with specified dtype - df = DataFrame(index=range(10), columns=['a','b'], dtype=object) + df = DataFrame(index=range(10), columns=['a', 'b'], dtype=object) self.assert_(df.values.dtype == np.object_) # does not error but ends up float - df = DataFrame(index=range(10), columns=['a','b'], dtype=int) + df = DataFrame(index=range(10), columns=['a', 'b'], dtype=int) self.assert_(df.values.dtype == np.object_) # #1783 empty dtype object @@ -2154,8 +2157,8 @@ def test_constructor_corner(self): self.assert_(df.values.dtype == np.object_) def test_constructor_scalar_inference(self): - data = {'int' : 1, 'bool' : True, - 'float' : 3., 'complex': 4j, 'object' : 'foo'} + data = {'int': 1, 'bool': True, + 'float': 3., 'complex': 4j, 'object': 'foo'} df = DataFrame(data, index=np.arange(10)) self.assert_(df['int'].dtype == np.int64) @@ -2212,9 +2215,9 @@ def test_constructor_more(self): tm.assert_frame_equal(dm, self.frame) # int cast - dm = DataFrame({'A' : np.ones(10, dtype=int), - 'B' : np.ones(10, dtype=float)}, - index=np.arange(10)) + dm = DataFrame({'A': np.ones(10, dtype=int), + 'B': np.ones(10, dtype=float)}, + index=np.arange(10)) self.assertEqual(len(dm.columns), 2) self.assert_(dm.values.dtype == np.float64) @@ -2232,12 +2235,12 @@ def test_constructor_list_of_lists(self): self.assert_(df['str'].dtype == np.object_) def test_constructor_list_of_dicts(self): - data = [OrderedDict([['a', 1.5], ['b', 3], ['c',4], ['d',6]]), - OrderedDict([['a', 1.5], ['b', 3], ['d',6]]), - OrderedDict([['a', 1.5],[ 'd',6]]), + data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]), + OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]), + OrderedDict([['a', 1.5], ['d', 6]]), OrderedDict(), - OrderedDict([['a', 1.5], ['b', 3],[ 'c',4]]), - OrderedDict([['b', 3], ['c',4], ['d',6]])] + OrderedDict([['a', 1.5], ['b', 3], ['c', 4]]), + OrderedDict([['b', 3], ['c', 4], ['d', 6]])] result = DataFrame(data) expected = DataFrame.from_dict(dict(zip(range(len(data)), data)), @@ -2249,8 +2252,8 @@ def test_constructor_list_of_dicts(self): assert_frame_equal(result, expected) def test_constructor_list_of_series(self): - data = [OrderedDict([['a', 1.5],[ 'b', 3.0],[ 'c',4.0]]), - OrderedDict([['a', 1.5], ['b', 3.0],[ 'c',6.0]])] + data = [OrderedDict([['a', 1.5], ['b', 3.0], ['c', 4.0]]), + OrderedDict([['a', 1.5], ['b', 3.0], ['c', 6.0]])] sdict = OrderedDict(zip(['x', 'y'], data)) idx = Index(['a', 'b', 'c']) @@ -2271,12 +2274,12 @@ def test_constructor_list_of_series(self): assert_frame_equal(result.sort_index(), expected) # none named - data = [OrderedDict([['a', 1.5], ['b', 3], ['c',4], ['d',6]]), - OrderedDict([['a', 1.5], ['b', 3], ['d',6]]), - OrderedDict([['a', 1.5],[ 'd',6]]), + data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]), + OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]), + OrderedDict([['a', 1.5], ['d', 6]]), OrderedDict(), - OrderedDict([['a', 1.5], ['b', 3],[ 'c',4]]), - OrderedDict([['b', 3], ['c',4], ['d',6]])] + OrderedDict([['a', 1.5], ['b', 3], ['c', 4]]), + OrderedDict([['b', 3], ['c', 4], ['d', 6]])] data = [Series(d) for d in data] result = DataFrame(data) @@ -2291,8 +2294,8 @@ def test_constructor_list_of_series(self): expected = DataFrame(index=[0]) assert_frame_equal(result, expected) - data = [OrderedDict([['a', 1.5],[ 'b', 3.0],[ 'c',4.0]]), - OrderedDict([['a', 1.5], ['b', 3.0],[ 'c',6.0]])] + data = [OrderedDict([['a', 1.5], ['b', 3.0], ['c', 4.0]]), + OrderedDict([['a', 1.5], ['b', 3.0], ['c', 6.0]])] sdict = OrderedDict(zip(range(len(data)), data)) idx = Index(['a', 'b', 'c']) @@ -2315,14 +2318,14 @@ class CustomDict(dict): assert_frame_equal(result, result_custom) def test_constructor_ragged(self): - data = {'A' : randn(10), - 'B' : randn(8)} + data = {'A': randn(10), + 'B': randn(8)} self.assertRaises(Exception, DataFrame, data) def test_constructor_scalar(self): idx = Index(range(3)) - df = DataFrame({"a" : 0}, index=idx) - expected = DataFrame({"a" : [0, 0, 0]}, index=idx) + df = DataFrame({"a": 0}, index=idx) + expected = DataFrame({"a": [0, 0, 0]}, index=idx) assert_frame_equal(df, expected) def test_constructor_Series_copy_bug(self): @@ -2331,7 +2334,7 @@ def test_constructor_Series_copy_bug(self): def test_constructor_mixed_dict_and_Series(self): data = {} - data['A'] = {'foo' : 1, 'bar' : 2, 'baz' : 3} + data['A'] = {'foo': 1, 'bar': 2, 'baz': 3} data['B'] = Series([4, 3, 2, 1], index=['bar', 'qux', 'baz', 'foo']) result = DataFrame(data) @@ -2339,12 +2342,12 @@ def test_constructor_mixed_dict_and_Series(self): # ordering ambiguous, raise exception self.assertRaises(Exception, DataFrame, - {'A' : ['a', 'b'], 'B' : {'a' : 'a', 'b' : 'b'}}) + {'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) # this is OK though - result = DataFrame({'A' : ['a', 'b'], - 'B' : Series(['a', 'b'], index=['a', 'b'])}) - expected = DataFrame({'A' : ['a', 'b'], 'B' : ['a', 'b']}, + result = DataFrame({'A': ['a', 'b'], + 'B': Series(['a', 'b'], index=['a', 'b'])}) + expected = DataFrame({'A': ['a', 'b'], 'B': ['a', 'b']}, index=['a', 'b']) assert_frame_equal(result, expected) @@ -2379,10 +2382,10 @@ def test_constructor_Series_named(self): def test_constructor_Series_differently_indexed(self): # name - s1 = Series([1, 2, 3], index=['a','b','c'], name='x') + s1 = Series([1, 2, 3], index=['a', 'b', 'c'], name='x') # no name - s2 = Series([1, 2, 3], index=['a','b','c']) + s2 = Series([1, 2, 3], index=['a', 'b', 'c']) other_index = Index(['a', 'b']) @@ -2430,7 +2433,8 @@ def test_constructor_from_items(self): orient='index') # orient='index', but thar be tuples - arr = lib.list_to_object_array([('bar', 'baz')] * len(self.mixed_frame)) + arr = lib.list_to_object_array( + [('bar', 'baz')] * len(self.mixed_frame)) self.mixed_frame['foo'] = arr row_items = [(idx, list(self.mixed_frame.xs(idx))) for idx in self.mixed_frame.index] @@ -2441,20 +2445,19 @@ def test_constructor_from_items(self): self.assert_(isinstance(recons['foo'][0], tuple)) rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], - orient='index', columns=['one', 'two', 'three']) + orient='index', columns=['one', 'two', 'three']) xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'], columns=['one', 'two', 'three']) assert_frame_equal(rs, xp) - def test_constructor_mix_series_nonseries(self): - df = DataFrame({'A' : self.frame['A'], - 'B' : list(self.frame['B'])}, columns=['A', 'B']) + df = DataFrame({'A': self.frame['A'], + 'B': list(self.frame['B'])}, columns=['A', 'B']) assert_frame_equal(df, self.frame.ix[:, ['A', 'B']]) self.assertRaises(ValueError, DataFrame, - {'A' : self.frame['A'], - 'B' : list(self.frame['B'])[:-2]}) + {'A': self.frame['A'], + 'B': list(self.frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) @@ -2469,28 +2472,30 @@ def test_constructor_column_duplicates(self): assert_frame_equal(df, edf) - idf = DataFrame.from_items([('a',[8]),('a',[5])], columns=['a','a']) + idf = DataFrame.from_items( + [('a', [8]), ('a', [5])], columns=['a', 'a']) assert_frame_equal(idf, edf) self.assertRaises(ValueError, DataFrame.from_items, - [('a',[8]),('a',[5]), ('b', [6])], - columns=['b', 'a','a']) + [('a', [8]), ('a', [5]), ('b', [6])], + columns=['b', 'a', 'a']) def test_constructor_single_value(self): - df = DataFrame(0., index=[1,2,3], columns=['a','b','c']) + df = DataFrame(0., index=[1, 2, 3], columns=['a', 'b', 'c']) assert_frame_equal(df, DataFrame(np.zeros(df.shape), df.index, df.columns)) - df = DataFrame('a', index=[1,2], columns=['a', 'c']) + df = DataFrame('a', index=[1, 2], columns=['a', 'c']) assert_frame_equal(df, DataFrame(np.array([['a', 'a'], ['a', 'a']], dtype=object), - index=[1,2], + index=[1, 2], columns=['a', 'c'])) - self.assertRaises(com.PandasError, DataFrame, 'a', [1,2]) - self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a' ,'c']) - self.assertRaises(com.PandasError, DataFrame, 'a', [1,2], ['a', 'c'], float) + self.assertRaises(com.PandasError, DataFrame, 'a', [1, 2]) + self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a', 'c']) + self.assertRaises( + com.PandasError, DataFrame, 'a', [1, 2], ['a', 'c'], float) def test_new_empty_index(self): df1 = DataFrame(randn(0, 3)) @@ -2537,8 +2542,8 @@ def test_pickle(self): def test_to_dict(self): test_data = { - 'A' : {'1' : 1, '2' : 2}, - 'B' : {'1' : '1', '2' : '2', '3' : '3'}, + 'A': {'1': 1, '2': 2}, + 'B': {'1': '1', '2': '2', '3': '3'}, } recons_data = DataFrame(test_data).to_dict() @@ -2548,13 +2553,13 @@ def test_to_dict(self): recons_data = DataFrame(test_data).to_dict("l") - for k,v in test_data.iteritems(): + for k, v in test_data.iteritems(): for k2, v2 in v.iteritems(): self.assertEqual(v2, recons_data[k][int(k2) - 1]) recons_data = DataFrame(test_data).to_dict("s") - for k,v in test_data.iteritems(): + for k, v in test_data.iteritems(): for k2, v2 in v.iteritems(): self.assertEqual(v2, recons_data[k][k2]) @@ -2709,8 +2714,8 @@ def test_to_records_dt64(self): def test_from_records_to_records(self): # from numpy documentation - arr = np.zeros((2,),dtype=('i4,f4,a10')) - arr[:] = [(1,2.,'Hello'),(2,3.,"World")] + arr = np.zeros((2,), dtype=('i4,f4,a10')) + arr[:] = [(1, 2., 'Hello'), (2, 3., "World")] frame = DataFrame.from_records(arr) @@ -2745,8 +2750,8 @@ def test_from_records_iterator(self): arr = np.array([(1.0, 2), (3.0, 4), (5., 6), (7., 8)], dtype=[('x', float), ('y', int)]) df = DataFrame.from_records(iter(arr), nrows=2) - xp = DataFrame({'x' : np.array([1.0, 3.0], dtype=float), - 'y' : np.array([2, 4], dtype=int)}) + xp = DataFrame({'x': np.array([1.0, 3.0], dtype=float), + 'y': np.array([2, 4], dtype=int)}) assert_frame_equal(df, xp) arr = [(1.0, 2), (3.0, 4), (5., 6), (7., 8)] @@ -2777,10 +2782,10 @@ def test_from_records_decimal(self): self.assert_(np.isnan(df['a'].values[-1])) def test_from_records_duplicates(self): - result = DataFrame.from_records([(1,2,3), (4,5,6)], - columns=['a','b','a']) + result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], + columns=['a', 'b', 'a']) - expected = DataFrame([(1,2,3), (4,5,6)], + expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=['a', 'b', 'a']) assert_frame_equal(result, expected) @@ -2819,7 +2824,7 @@ def test_from_records_misc_brokenness(self): assert_frame_equal(result, exp) def test_to_records_floats(self): - df = DataFrame(np.random.rand(10,10)) + df = DataFrame(np.random.rand(10, 10)) df.to_records() def test_to_recods_index_name(self): @@ -2838,25 +2843,25 @@ def test_to_recods_index_name(self): self.assert_('level_0' in rs.dtype.fields) def test_join_str_datetime(self): - str_dates = ['20120209' , '20120222'] - dt_dates = [datetime(2012,2,9), datetime(2012,2,22)] + str_dates = ['20120209', '20120222'] + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] A = DataFrame(str_dates, index=range(2), columns=['aa']) - C = DataFrame([[1,2],[3,4]], index=str_dates, columns=dt_dates) + C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) - tst = A.join(C, on = 'aa') + tst = A.join(C, on='aa') self.assert_(len(tst.columns) == 3) def test_from_records_sequencelike(self): - df = DataFrame({'A' : np.random.randn(6), - 'B' : np.arange(6), - 'C' : ['foo'] * 6, - 'D' : np.array([True, False] * 3, dtype=bool)}) + df = DataFrame({'A': np.random.randn(6), + 'B': np.arange(6), + 'C': ['foo'] * 6, + 'D': np.array([True, False] * 3, dtype=bool)}) tuples = [tuple(x) for x in df.values] lists = [list(x) for x in tuples] - asdict = dict((x,y) for x, y in df.iteritems()) + asdict = dict((x, y) for x, y in df.iteritems()) result = DataFrame.from_records(tuples, columns=df.columns) result2 = DataFrame.from_records(lists, columns=df.columns) @@ -2870,7 +2875,7 @@ def test_from_records_sequencelike(self): self.assert_(np.array_equal(result.columns, range(4))) # test exclude parameter - result = DataFrame.from_records(tuples, exclude=[0,1,3]) + result = DataFrame.from_records(tuples, exclude=[0, 1, 3]) result.columns = ['C'] assert_frame_equal(result, df[['C']]) @@ -2884,14 +2889,14 @@ def test_from_records_sequencelike(self): self.assertEqual(len(result.columns), 0) def test_from_records_with_index_data(self): - df = DataFrame(np.random.randn(10,3), columns=['A', 'B', 'C']) + df = DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) data = np.random.randn(10) df1 = DataFrame.from_records(df, index=data) assert(df1.index.equals(Index(data))) def test_from_records_bad_index_column(self): - df = DataFrame(np.random.randn(10,3), columns=['A', 'B', 'C']) + df = DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) # should pass df1 = DataFrame.from_records(df, index=['C']) @@ -2939,9 +2944,9 @@ def test_nonzero(self): self.assertFalse(self.mixed_frame.empty) # corner case - df = DataFrame({'A' : [1., 2., 3.], - 'B' : ['a', 'b', 'c']}, - index=np.arange(3)) + df = DataFrame({'A': [1., 2., 3.], + 'B': ['a', 'b', 'c']}, + index=np.arange(3)) del df['A'] self.assertFalse(df.empty) @@ -2965,9 +2970,9 @@ def test_repr_mixed(self): @slow def test_repr_mixed_big(self): # big mixed - biggie = DataFrame({'A' : randn(200), - 'B' : tm.makeStringIndex(200)}, - index=range(200)) + biggie = DataFrame({'A': randn(200), + 'B': tm.makeStringIndex(200)}, + index=range(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -2993,7 +2998,7 @@ def test_repr(self): # no columns or index self.empty.info(buf=buf) - df = DataFrame(["a\n\r\tb"],columns=["a\n\r\td"],index=["a\n\r\tf"]) + df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) self.assertFalse("\t" in repr(df)) self.assertFalse("\r" in repr(df)) self.assertFalse("a\n" in repr(df)) @@ -3015,10 +3020,11 @@ def test_repr_unsortable(self): category=FutureWarning, module=".*format") - unsortable = DataFrame({'foo' : [1] * 50, - datetime.today() : [1] * 50, - 'bar' : ['bar'] * 50, - datetime.today() + timedelta(1) : ['bar'] * 50}, + unsortable = DataFrame({'foo': [1] * 50, + datetime.today(): [1] * 50, + 'bar': ['bar'] * 50, + datetime.today( + ) + timedelta(1): ['bar'] * 50}, index=np.arange(50)) foo = repr(unsortable) @@ -3120,16 +3126,16 @@ def test_pop(self): self.assert_('foo' not in self.frame) def test_pop_non_unique_cols(self): - df=DataFrame({0:[0,1],1:[0,1],2:[4,5]}) - df.columns=["a","b","a"] + df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]}) + df.columns = ["a", "b", "a"] - res=df.pop("a") - self.assertEqual(type(res),DataFrame) - self.assertEqual(len(res),2) - self.assertEqual(len(df.columns),1) + res = df.pop("a") + self.assertEqual(type(res), DataFrame) + self.assertEqual(len(res), 2) + self.assertEqual(len(df.columns), 1) self.assertTrue("b" in df.columns) self.assertFalse("a" in df.columns) - self.assertEqual(len(df.index),2) + self.assertEqual(len(df.index), 2) def test_iter(self): self.assert_(tm.equalContents(list(self.frame), self.frame.columns)) @@ -3147,7 +3153,7 @@ def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): s = Series(tup[1:]) s.name = tup[0] - expected = self.frame.ix[i,:].reset_index(drop=True) + expected = self.frame.ix[i, :].reset_index(drop=True) assert_series_equal(s, expected) df = DataFrame({'floats': np.random.randn(5), @@ -3186,12 +3192,12 @@ def test_operators(self): expected = self.frame2 * 2 assert_frame_equal(added, expected) - df = DataFrame({'a' : ['a', None, 'b']}) - assert_frame_equal(df + df, DataFrame({'a' : ['aa', np.nan, 'bb']})) + df = DataFrame({'a': ['a', None, 'b']}) + assert_frame_equal(df + df, DataFrame({'a': ['aa', np.nan, 'bb']})) def test_operators_none_as_na(self): - df = DataFrame({"col1": [2,5.0,123,None], - "col2": [1,2,3,4]}, dtype=object) + df = DataFrame({"col1": [2, 5.0, 123, None], + "col2": [1, 2, 3, 4]}, dtype=object) ops = [operator.add, operator.sub, operator.mul, operator.truediv] @@ -3284,7 +3290,7 @@ def test_neg(self): assert_frame_equal(-self.frame, -1 * self.frame) def test_invert(self): - assert_frame_equal(-(self.frame < 0), ~(self.frame <0)) + assert_frame_equal(-(self.frame < 0), ~(self.frame < 0)) def test_first_last_valid(self): N = len(self.frame.index) @@ -3292,7 +3298,7 @@ def test_first_last_valid(self): mat[:5] = nan mat[-5:] = nan - frame = DataFrame({'foo' : mat}, index=self.frame.index) + frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() self.assert_(index == frame.index[5]) @@ -3341,9 +3347,8 @@ def test_arith_mixed(self): 'B': [2, 4, 6]}) assert_frame_equal(result, expected) - def test_arith_getitem_commute(self): - df = DataFrame({'A' : [1.1,3.3],'B' : [2.5,-3.9]}) + df = DataFrame({'A': [1.1, 3.3], 'B': [2.5, -3.9]}) self._test_op(df, operator.add) self._test_op(df, operator.sub) @@ -3490,23 +3495,23 @@ def _test_seq(df, idx_ser, col_ser): # complex arr = np.array([np.nan, 1, 6, np.nan]) arr2 = np.array([2j, np.nan, 7, None]) - df = DataFrame({'a' : arr}) - df2 = DataFrame({'a' : arr2}) + df = DataFrame({'a': arr}) + df2 = DataFrame({'a': arr2}) rs = df.gt(df2) self.assert_(not rs.values.any()) rs = df.ne(df2) self.assert_(rs.values.all()) arr3 = np.array([2j, np.nan, None]) - df3 = DataFrame({'a' : arr3}) + df3 = DataFrame({'a': arr3}) rs = df3.gt(2j) self.assert_(not rs.values.any()) # corner, dtype=object - df1 = DataFrame({'col' : ['foo', np.nan, 'bar']}) - df2 = DataFrame({'col' : ['foo', datetime.now(), 'bar']}) + df1 = DataFrame({'col': ['foo', np.nan, 'bar']}) + df2 = DataFrame({'col': ['foo', datetime.now(), 'bar']}) result = df1.ne(df2) - exp = DataFrame({'col' : [False, True, False]}) + exp = DataFrame({'col': [False, True, False]}) assert_frame_equal(result, exp) def test_arith_flex_series(self): @@ -3537,7 +3542,6 @@ def test_arith_non_pandas_object(self): index=df.index, columns=df.columns) assert_frame_equal(df.add(val1, axis=0), added) - val2 = list(df['two']) added = DataFrame(df.values + val2, index=df.index, columns=df.columns) @@ -3559,8 +3563,8 @@ def test_combineFrame(self): added = self.frame + frame_copy tm.assert_dict_equal(added['A'].valid(), - self.frame['A'] * 2, - compare_keys=False) + self.frame['A'] * 2, + compare_keys=False) self.assert_(np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()) @@ -3685,14 +3689,14 @@ def test_comp(func): test_comp(operator.le) def test_string_comparison(self): - df = DataFrame([{ "a" : 1, "b" : "foo" }, {"a" : 2, "b" : "bar"}]) + df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) mask_a = df.a > 1 - assert_frame_equal(df[mask_a], df.ix[1:1,:]) - assert_frame_equal(df[-mask_a], df.ix[0:0,:]) + assert_frame_equal(df[mask_a], df.ix[1:1, :]) + assert_frame_equal(df[-mask_a], df.ix[0:0, :]) mask_b = df.b == "foo" - assert_frame_equal(df[mask_b], df.ix[0:0,:]) - assert_frame_equal(df[-mask_b], df.ix[1:1,:]) + assert_frame_equal(df[mask_b], df.ix[0:0, :]) + assert_frame_equal(df[-mask_b], df.ix[1:1, :]) def test_float_none_comparison(self): df = DataFrame(np.random.randn(8, 3), index=range(8), @@ -3727,15 +3731,13 @@ def test_to_csv_from_csv(self): assert_almost_equal(self.tsframe.values, recons.values) # corner case - dm = DataFrame({'s1' : Series(range(3),range(3)), - 's2' : Series(range(2),range(2))}) + dm = DataFrame({'s1': Series(range(3), range(3)), + 's2': Series(range(2), range(2))}) dm.to_csv(path) recons = DataFrame.from_csv(path) assert_frame_equal(dm, recons) - - - #duplicate index + # duplicate index df = DataFrame(np.random.randn(3, 3), index=['a', 'a', 'b'], columns=['x', 'y', 'z']) df.to_csv(path) @@ -3805,7 +3807,7 @@ def test_to_csv_multiindex(self): frame = self.frame old_index = frame.index - arrays = np.arange(len(old_index)*2).reshape(2,-1) + arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index frame.to_csv(path, header=False) @@ -3813,11 +3815,11 @@ def test_to_csv_multiindex(self): # round trip frame.to_csv(path) - df = DataFrame.from_csv(path, index_col=[0,1], parse_dates=False) + df = DataFrame.from_csv(path, index_col=[0, 1], parse_dates=False) assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names) - self.frame.index = old_index # needed if setUP becomes a classmethod + self.frame.index = old_index # needed if setUP becomes a classmethod # try multiindex with dates tsframe = self.tsframe @@ -3825,8 +3827,8 @@ def test_to_csv_multiindex(self): new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) - tsframe.to_csv(path, index_label = ['time','foo']) - recons = DataFrame.from_csv(path, index_col=[0,1]) + tsframe.to_csv(path, index_label=['time', 'foo']) + recons = DataFrame.from_csv(path, index_col=[0, 1]) assert_frame_equal(tsframe, recons) # do not load index @@ -3838,7 +3840,7 @@ def test_to_csv_multiindex(self): tsframe.to_csv(path, index=False) recons = DataFrame.from_csv(path, index_col=None) assert_almost_equal(recons.values, self.tsframe.values) - self.tsframe.index = old_index # needed if setUP becomes classmethod + self.tsframe.index = old_index # needed if setUP becomes classmethod os.remove(path) @@ -3869,7 +3871,7 @@ def test_to_csv_withcommas(self): path = '__tmp_to_csv_withcommas__' # Commas inside fields should be correctly escaped when saving as CSV. - df = DataFrame({'A':[1,2,3], 'B':['5,6','7,8','9,0']}) + df = DataFrame({'A': [1, 2, 3], 'B': ['5,6', '7,8', '9,0']}) df.to_csv(path) df2 = DataFrame.from_csv(path) assert_frame_equal(df2, df) @@ -3879,7 +3881,7 @@ def test_to_csv_withcommas(self): def test_to_csv_bug(self): path = '__tmp_to_csv_bug__.csv' f1 = StringIO('a,1.0\nb,2.0') - df = DataFrame.from_csv(f1,header=None) + df = DataFrame.from_csv(f1, header=None) newdf = DataFrame({'t': df[df.columns[0]]}) newdf.to_csv(path) @@ -3890,7 +3892,7 @@ def test_to_csv_bug(self): def test_to_csv_unicode(self): path = '__tmp_to_csv_unicode__.csv' - df = DataFrame({u'c/\u03c3':[1,2,3]}) + df = DataFrame({u'c/\u03c3': [1, 2, 3]}) df.to_csv(path, encoding='UTF-8') df2 = pan.read_csv(path, index_col=0, encoding='UTF-8') assert_frame_equal(df, df2) @@ -3902,10 +3904,12 @@ def test_to_csv_unicode(self): os.remove(path) def test_to_csv_unicode_index_col(self): - buf=StringIO('') - df=DataFrame([[u"\u05d0","d2","d3","d4"],["a1","a2","a3","a4"]], - columns=[u"\u05d0",u"\u05d1",u"\u05d2",u"\u05d3"], - index=[u"\u05d0",u"\u05d1"]) + buf = StringIO('') + df = DataFrame( + [[u"\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=[u"\u05d0", + u"\u05d1", u"\u05d2", u"\u05d3"], + index=[u"\u05d0", u"\u05d1"]) df.to_csv(buf, encoding='UTF-8') buf.seek(0) @@ -3957,7 +3961,7 @@ def test_to_csv_unicodewriter_quoting(self): buf = StringIO() df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, - encoding = 'utf-8') + encoding='utf-8') result = buf.getvalue() expected = ('"A","B"\n' @@ -3992,15 +3996,13 @@ def test_to_csv_line_terminators(self): self.assertEqual(buf.getvalue(), expected) buf = StringIO() - df.to_csv(buf) # The default line terminator remains \n + df.to_csv(buf) # The default line terminator remains \n expected = (',A,B\n' 'one,1,4\n' 'two,2,5\n' 'three,3,6\n') self.assertEqual(buf.getvalue(), expected) - - def test_info(self): io = StringIO() self.frame.info(buf=io) @@ -4034,7 +4036,6 @@ def test_info_wide(self): self.assert_(rs == xp) reset_option('display.max_info_columns') - def test_info_duplicate_columns(self): io = StringIO() @@ -4058,7 +4059,8 @@ def test_convert_objects(self): self.assert_(converted['A'].dtype == np.float64) def test_convert_objects_no_conversion(self): - mixed1 = DataFrame({'a': [1,2,3], 'b': [4.0, 5, 6], 'c': ['x','y','z']}) + mixed1 = DataFrame( + {'a': [1, 2, 3], 'b': [4.0, 5, 6], 'c': ['x', 'y', 'z']}) mixed2 = mixed1.convert_objects() assert_frame_equal(mixed1, mixed2) @@ -4072,7 +4074,7 @@ def test_append_series_dict(self): self.assertRaises(Exception, df.append, series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) - expected = df.append(DataFrame({0 : series[::-1]}, index=df.columns).T, + expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True) assert_frame_equal(result, expected) @@ -4081,7 +4083,7 @@ def test_append_series_dict(self): assert_frame_equal(result, expected) result = df.append(series[::-1][:3], ignore_index=True) - expected = df.append(DataFrame({0 : series[::-1][:3]}).T, + expected = df.append(DataFrame({0: series[::-1][:3]}).T, ignore_index=True) assert_frame_equal(result, expected.ix[:, result.columns]) @@ -4128,9 +4130,9 @@ def test_asfreq(self): def test_asfreq_datetimeindex(self): from pandas import DatetimeIndex - df = DataFrame({'A': [1,2,3]}, - index=[datetime(2011,11,01), datetime(2011,11,2), - datetime(2011,11,3)]) + df = DataFrame({'A': [1, 2, 3]}, + index=[datetime(2011, 11, 01), datetime(2011, 11, 2), + datetime(2011, 11, 3)]) df = df.asfreq('B') self.assert_(isinstance(df.index, DatetimeIndex)) @@ -4154,7 +4156,7 @@ def test_as_matrix(self): mat = self.mixed_frame.as_matrix(['foo', 'A']) self.assertEqual(mat[0, 0], 'bar') - df = DataFrame({'real' : [1,2,3], 'complex' : [1j, 2j, 3j]}) + df = DataFrame({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) mat = df.as_matrix() self.assertEqual(mat[0, 0], 1j) @@ -4271,7 +4273,7 @@ def test_corr_constant(self): def test_corr_int(self): # dtypes other than float64 #1761 - df3 = DataFrame({"a":[1,2,3,4], "b":[1,2,3,4]}) + df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) # it works! df3.cov() @@ -4309,7 +4311,6 @@ def test_cov(self): expected = self.mixed_frame.ix[:, ['A', 'B', 'C', 'D']].cov() assert_frame_equal(result, expected) - def test_corrwith(self): a = self.tsframe noise = Series(randn(len(a)), index=a.index) @@ -4369,7 +4370,7 @@ def test_dropEmptyRows(self): mat = randn(N) mat[:5] = nan - frame = DataFrame({'foo' : mat}, index=self.frame.index) + frame = DataFrame({'foo': mat}, index=self.frame.index) smaller_frame = frame.dropna(how='all') self.assert_(np.array_equal(smaller_frame['foo'], mat[5:])) @@ -4382,7 +4383,7 @@ def test_dropIncompleteRows(self): mat = randn(N) mat[:5] = nan - frame = DataFrame({'foo' : mat}, index=self.frame.index) + frame = DataFrame({'foo': mat}, index=self.frame.index) frame['bar'] = 5 smaller_frame = frame.dropna() @@ -4450,12 +4451,12 @@ def test_dropna_multiple_axes(self): assert_frame_equal(result2, expected) def test_drop_duplicates(self): - df = DataFrame({'AAA' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'bar', 'foo'], - 'B' : ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C' : [1, 1, 2, 2, 2, 2, 1, 2], - 'D' : range(8)}) + df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': [1, 1, 2, 2, 2, 2, 1, 2], + 'D': range(8)}) # single column result = df.drop_duplicates('AAA') @@ -4490,12 +4491,12 @@ def test_drop_duplicates(self): assert_frame_equal(result, expected) def test_drop_duplicates_tuple(self): - df = DataFrame({('AA', 'AB') : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'bar', 'foo'], - 'B' : ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C' : [1, 1, 2, 2, 2, 2, 1, 2], - 'D' : range(8)}) + df = DataFrame({('AA', 'AB'): ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': [1, 1, 2, 2, 2, 2, 1, 2], + 'D': range(8)}) # single column result = df.drop_duplicates(('AA', 'AB')) @@ -4513,12 +4514,12 @@ def test_drop_duplicates_tuple(self): def test_drop_duplicates_NA(self): # none - df = DataFrame({'A' : [None, None, 'foo', 'bar', - 'foo', 'bar', 'bar', 'foo'], - 'B' : ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C' : [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D' : range(8)}) + df = DataFrame({'A': [None, None, 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], + 'D': range(8)}) # single column result = df.drop_duplicates('A') @@ -4539,12 +4540,12 @@ def test_drop_duplicates_NA(self): assert_frame_equal(result, expected) # nan - df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'bar', 'foo'], - 'B' : ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C' : [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D' : range(8)}) + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], + 'D': range(8)}) # single column result = df.drop_duplicates('C') @@ -4565,12 +4566,12 @@ def test_drop_duplicates_NA(self): assert_frame_equal(result, expected) def test_drop_duplicates_inplace(self): - orig = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'bar', 'foo'], - 'B' : ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C' : [1, 1, 2, 2, 2, 2, 1, 2], - 'D' : range(8)}) + orig = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': [1, 1, 2, 2, 2, 2, 1, 2], + 'D': range(8)}) # single column df = orig.copy() @@ -4618,16 +4619,16 @@ def test_drop_duplicates_inplace(self): assert_frame_equal(result, expected) def test_drop_col_still_multiindex(self): - arrays = [[ 'a', 'b', 'c', 'top'], - [ '', '', '', 'OD' ], - [ '', '', '', 'wx' ]] + arrays = [['a', 'b', 'c', 'top'], + ['', '', '', 'OD'], + ['', '', '', 'wx']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(3,4), columns=index) - del df[('a','','')] + df = DataFrame(randn(3, 4), columns=index) + del df[('a', '', '')] assert(isinstance(df.columns, MultiIndex)) def test_fillna(self): @@ -4706,7 +4707,7 @@ def test_fillna_dict_series(self): assert_frame_equal(result, expected) # it works - result = df.fillna({'a': 0, 'b': 5, 'd' : 7}) + result = df.fillna({'a': 0, 'b': 5, 'd': 7}) # Series treated same as dict result = df.fillna(df.max()) @@ -4791,12 +4792,13 @@ def test_replace_interpolate(self): padded = self.tsframe.replace(nan, method='pad') assert_frame_equal(padded, self.tsframe.fillna(method='pad')) - result = self.tsframe.replace(to_replace={'A' : nan}, method='pad', + result = self.tsframe.replace(to_replace={'A': nan}, method='pad', axis=1) - expected = self.tsframe.T.replace(to_replace={'A' : nan}, method='pad').T + expected = self.tsframe.T.replace( + to_replace={'A': nan}, method='pad').T assert_frame_equal(result, expected) - result = self.tsframe.replace(to_replace={'A' : nan, 'B' : -1e8}, + result = self.tsframe.replace(to_replace={'A': nan, 'B': -1e8}, method='bfill') tsframe = self.tsframe.copy() b = tsframe['B'] @@ -4831,9 +4833,9 @@ def test_replace_interpolate(self): def test_replace_dtypes(self): # int - df = DataFrame({'ints' : [1,2,3]}) + df = DataFrame({'ints': [1, 2, 3]}) result = df.replace(1, 0) - expected = DataFrame({'ints' : [0,2,3]}) + expected = DataFrame({'ints': [0, 2, 3]}) assert_frame_equal(result, expected) # bools @@ -4841,7 +4843,7 @@ def test_replace_dtypes(self): result = df.replace(False, True) self.assert_(result.values.all()) - #complex blocks + # complex blocks df = DataFrame({'complex': [1j, 2j, 3j]}) result = df.replace(1j, 0j) expected = DataFrame({'complex': [0j, 2j, 3j]}) @@ -4850,17 +4852,17 @@ def test_replace_dtypes(self): # datetime blocks prev = datetime.today() now = datetime.today() - df = DataFrame({'datetime64' : Index([prev, now, prev])}) + df = DataFrame({'datetime64': Index([prev, now, prev])}) result = df.replace(prev, now) - expected = DataFrame({'datetime64' : Index([now] * 3)}) + expected = DataFrame({'datetime64': Index([now] * 3)}) assert_frame_equal(result, expected) def test_replace_input_formats(self): # both dicts - to_rep = {'A' : np.nan, 'B' : 0, 'C' : ''} - values = {'A' : 0, 'B' : -1, 'C' : 'missing'} - df = DataFrame({'A' : [np.nan, 0, np.inf], 'B' : [0, 2, 5], - 'C' : ['', 'asdf', 'fd']}) + to_rep = {'A': np.nan, 'B': 0, 'C': ''} + values = {'A': 0, 'B': -1, 'C': 'missing'} + df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], + 'C': ['', 'asdf', 'fd']}) filled = df.replace(to_rep, values) expected = {} for k, v in df.iteritems(): @@ -4868,8 +4870,8 @@ def test_replace_input_formats(self): assert_frame_equal(filled, DataFrame(expected)) result = df.replace([0, 2, 5], [5, 2, 0]) - expected = DataFrame({'A' : [np.nan, 5, np.inf], 'B' : [5, 2, 0], - 'C' : ['', 'asdf', 'fd']}) + expected = DataFrame({'A': [np.nan, 5, np.inf], 'B': [5, 2, 0], + 'C': ['', 'asdf', 'fd']}) assert_frame_equal(result, expected) # dict to scalar @@ -4882,9 +4884,9 @@ def test_replace_input_formats(self): self.assertRaises(ValueError, df.replace, to_rep, [np.nan, 0, '']) # scalar to dict - values = {'A' : 0, 'B' : -1, 'C' : 'missing'} - df = DataFrame({'A' : [np.nan, 0, np.nan], 'B' : [0, 2, 5], - 'C' : ['', 'asdf', 'fd']}) + values = {'A': 0, 'B': -1, 'C': 'missing'} + df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5], + 'C': ['', 'asdf', 'fd']}) filled = df.replace(np.nan, values) expected = {} for k, v in df.iteritems(): @@ -5003,8 +5005,8 @@ def test_xs(self): # mixed-type xs test_data = { - 'A' : {'1' : 1, '2' : 2}, - 'B' : {'1' : '1', '2' : '2', '3' : '3'}, + 'A': {'1': 1, '2': 2}, + 'B': {'1': '1', '2': '2', '3': '3'}, } frame = DataFrame(test_data) xs = frame.xs('1') @@ -5056,17 +5058,18 @@ def test_xs_duplicates(self): def test_pivot(self): data = { - 'index' : ['A', 'B', 'C', 'C', 'B', 'A'], - 'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'], - 'values' : [1., 2., 3., 3., 2., 1.] + 'index': ['A', 'B', 'C', 'C', 'B', 'A'], + 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], + 'values': [1., 2., 3., 3., 2., 1.] } frame = DataFrame(data) - pivoted = frame.pivot(index='index', columns='columns', values='values') + pivoted = frame.pivot( + index='index', columns='columns', values='values') expected = DataFrame({ - 'One' : {'A' : 1., 'B' : 2., 'C' : 3.}, - 'Two' : {'A' : 1., 'B' : 2., 'C' : 3.} + 'One': {'A': 1., 'B': 2., 'C': 3.}, + 'Two': {'A': 1., 'B': 2., 'C': 3.} }) assert_frame_equal(pivoted, expected) @@ -5087,9 +5090,9 @@ def test_pivot(self): assert_frame_equal(df.pivot('major', 'minor'), lp.unstack()) def test_pivot_duplicates(self): - data = DataFrame({'a' : ['bar', 'bar', 'foo', 'foo', 'foo'], - 'b' : ['one', 'two', 'one', 'one', 'two'], - 'c' : [1., 2., 3., 3., 4.]}) + data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], + 'b': ['one', 'two', 'one', 'one', 'two'], + 'c': [1., 2., 3., 3., 4.]}) self.assertRaises(Exception, data.pivot, 'a', 'b', 'c') def test_pivot_empty(self): @@ -5138,7 +5141,7 @@ def test_reindex(self): for col, series in nonContigFrame.iteritems(): self.assert_(tm.equalContents(series.index, - nonContigFrame.index)) + nonContigFrame.index)) # corner cases @@ -5258,7 +5261,7 @@ def test_align(self): other = self.frame.ix[:-5, :3] af, bf = self.frame.align(other, axis=0, fill_value=-1) self.assert_(bf.columns.equals(other.columns)) - #test fill value + # test fill value join_idx = self.frame.index.join(other.index) diff_a = self.frame.index.diff(join_idx) diff_b = other.index.diff(join_idx) @@ -5277,7 +5280,7 @@ def test_align(self): self.assert_(bf.columns.equals(self.frame.columns)) self.assert_(bf.index.equals(other.index)) - #test fill value + # test fill value join_idx = self.frame.index.join(other.index) diff_a = self.frame.index.diff(join_idx) diff_b = other.index.diff(join_idx) @@ -5299,16 +5302,16 @@ def test_align(self): join='inner', axis=1, method='pad') self.assert_(bf.columns.equals(self.mixed_frame.columns)) - af, bf = self.frame.align(other.ix[:,0], join='inner', axis=1, + af, bf = self.frame.align(other.ix[:, 0], join='inner', axis=1, method=None, fill_value=None) self.assert_(bf.index.equals(Index([]))) - af, bf = self.frame.align(other.ix[:,0], join='inner', axis=1, + af, bf = self.frame.align(other.ix[:, 0], join='inner', axis=1, method=None, fill_value=0) self.assert_(bf.index.equals(Index([]))) # try to align dataframe to series along bad axis - self.assertRaises(ValueError, self.frame.align, af.ix[0,:3], + self.assertRaises(ValueError, self.frame.align, af.ix[0, :3], join='inner', axis=2) def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): @@ -5324,7 +5327,7 @@ def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): eb = eb.reindex(index=join_index) if axis is None or axis == 1: - join_columns = a.columns.join(b.columns, how=how) + join_columns = a.columns.join(b.columns, how=how) ea = ea.reindex(columns=join_columns) eb = eb.reindex(columns=join_columns) @@ -5386,11 +5389,10 @@ def _check_align_fill(self, kind, meth, ax, fax): self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1) - def test_align_int_fill_bug(self): # GH #910 - X = np.random.rand(10,10) - Y = np.ones((10,1),dtype=int) + X = np.random.rand(10, 10) + Y = np.ones((10, 1), dtype=int) df1 = DataFrame(X) df1['0.X'] = Y.squeeze() @@ -5450,10 +5452,8 @@ def test_mask(self): assert_frame_equal(rs, df.mask(df <= 0)) assert_frame_equal(rs, df.mask(~cond)) - #---------------------------------------------------------------------- # Transposing - def test_transpose(self): frame = self.frame dft = frame.T @@ -5483,10 +5483,10 @@ def test_transpose_get_view(self): def test_rename(self): mapping = { - 'A' : 'a', - 'B' : 'b', - 'C' : 'c', - 'D' : 'd' + 'A': 'a', + 'B': 'b', + 'C': 'c', + 'D': 'd' } renamed = self.frame.rename(columns=mapping) @@ -5499,12 +5499,12 @@ def test_rename(self): # index data = { - 'A' : {'foo' : 0, 'bar' : 1} + 'A': {'foo': 0, 'bar': 1} } # gets sorted alphabetical df = DataFrame(data) - renamed = df.rename(index={'foo' : 'bar', 'bar' : 'foo'}) + renamed = df.rename(index={'foo': 'bar', 'bar': 'foo'}) self.assert_(np.array_equal(renamed.index, ['foo', 'bar'])) renamed = df.rename(index=str.upper) @@ -5514,26 +5514,26 @@ def test_rename(self): self.assertRaises(Exception, self.frame.rename) # partial columns - renamed = self.frame.rename(columns={'C' : 'foo', 'D' : 'bar'}) + renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) self.assert_(np.array_equal(renamed.columns, ['A', 'B', 'foo', 'bar'])) # other axis - renamed = self.frame.T.rename(index={'C' : 'foo', 'D' : 'bar'}) + renamed = self.frame.T.rename(index={'C': 'foo', 'D': 'bar'}) self.assert_(np.array_equal(renamed.index, ['A', 'B', 'foo', 'bar'])) def test_rename_nocopy(self): - renamed = self.frame.rename(columns={'C' : 'foo'}, copy=False) + renamed = self.frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. self.assert_((self.frame['C'] == 1.).all()) def test_rename_inplace(self): - self.frame.rename(columns={'C' : 'foo'}) + self.frame.rename(columns={'C': 'foo'}) self.assert_('C' in self.frame) self.assert_('foo' not in self.frame) c_id = id(self.frame['C']) frame = self.frame.copy() - res = frame.rename(columns={'C' : 'foo'}, inplace=True) + res = frame.rename(columns={'C': 'foo'}, inplace=True) self.assertTrue(res is None) @@ -5541,10 +5541,8 @@ def test_rename_inplace(self): self.assert_('foo' in frame) self.assert_(id(frame['foo']) != c_id) - #---------------------------------------------------------------------- # Time series related - def test_diff(self): the_diff = self.tsframe.diff(1) @@ -5598,8 +5596,8 @@ def test_pct_change_shift_over_nas(self): df = DataFrame({'a': s, 'b': s}) chg = df.pct_change() - expected = Series([np.nan, 0.5, np.nan, 2.5/1.5 -1, .2]) - edf = DataFrame({'a': expected, 'b':expected}) + expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) + edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) def test_shift(self): @@ -5652,8 +5650,8 @@ def test_shift(self): self.assertRaises(ValueError, ps.shift, freq='D') def test_shift_bool(self): - df = DataFrame({'high':[True, False], - 'low':[False, False]}) + df = DataFrame({'high': [True, False], + 'low': [False, False]}) rs = df.shift(1) xp = DataFrame(np.array([[np.nan, np.nan], [True, False]], dtype=object), @@ -5708,10 +5706,11 @@ def test_apply(self): d = self.frame.index[0] applied = self.frame.apply(np.mean, axis=1) self.assertEqual(applied[d], np.mean(self.frame.xs(d))) - self.assert_(applied.index is self.frame.index) # want this + self.assert_(applied.index is self.frame.index) # want this - #invalid axis - df = DataFrame([[1,2,3], [4,5,6], [7,8,9]], index=['a','a','c']) + # invalid axis + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) self.assertRaises(ValueError, df.apply, lambda x: x, 2) def test_apply_empty(self): @@ -5732,13 +5731,14 @@ def test_apply_empty(self): expected = Series(np.nan, index=self.frame.index) assert_series_equal(result, expected) - #2476 + # 2476 xp = DataFrame(index=['a']) rs = xp.apply(lambda x: x['a'], axis=1) assert_frame_equal(xp, rs) def test_apply_standard_nonunique(self): - df = DataFrame([[1,2,3], [4,5,6], [7,8,9]], index=['a','a','c']) + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) rs = df.apply(lambda s: s[0], axis=1) xp = Series([1, 4, 7], ['a', 'a', 'c']) assert_series_equal(rs, xp) @@ -5787,8 +5787,8 @@ def test_apply_ignore_failures(self): # test with hierarchical index def test_apply_mixed_dtype_corner(self): - df = DataFrame({'A' : ['foo'], - 'B' : [1.]}) + df = DataFrame({'A': ['foo'], + 'B': [1.]}) result = df[:0].apply(np.mean, axis=1) # the result here is actually kind of ambiguous, should it be a Series # or a DataFrame? @@ -5873,18 +5873,18 @@ def test_apply_differently_indexed(self): assert_frame_equal(result1, expected1) def test_apply_modify_traceback(self): - data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B' : ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C' : ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D' : np.random.randn(11), - 'E' : np.random.randn(11), - 'F' : np.random.randn(11)}) + data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', + 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', + 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', + 'dull', 'shiny', 'shiny', 'dull', + 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) data['C'][4] = np.nan @@ -5894,8 +5894,8 @@ def transform(row): return row def transform2(row): - if (notnull(row['C']) and row['C'].startswith('shin') - and row['A'] == 'foo'): + if (notnull(row['C']) and row['C'].startswith('shin') + and row['A'] == 'foo'): row['D'] = 7 return row @@ -5913,18 +5913,18 @@ def test_swapaxes(self): self.assertRaises(ValueError, df.swapaxes, 2, 5) def test_apply_convert_objects(self): - data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B' : ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C' : ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D' : np.random.randn(11), - 'E' : np.random.randn(11), - 'F' : np.random.randn(11)}) + data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', + 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', + 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', + 'dull', 'shiny', 'shiny', 'dull', + 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) result = data.apply(lambda x: x, axis=1) assert_frame_equal(result, data) @@ -5979,7 +5979,7 @@ def test_filter(self): self.assert_('AA' in filtered) # like with ints in column names - df = DataFrame(0., index=[0,1,2], columns=[0,1,'_A','_B']) + df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) filtered = df.filter(like='_') self.assertEqual(len(filtered.columns), 2) @@ -6090,8 +6090,8 @@ def test_sort_index_multicolumn(self): B = np.tile(np.arange(5), 20) random.shuffle(A) random.shuffle(B) - frame = DataFrame({'A' : A, 'B' : B, - 'C' : np.random.randn(100)}) + frame = DataFrame({'A': A, 'B': B, + 'C': np.random.randn(100)}) result = frame.sort_index(by=['A', 'B']) indexer = np.lexsort((frame['B'], frame['A'])) @@ -6150,8 +6150,8 @@ def test_sort_index_different_sortorder(self): A = A.take(indexer) B = B.take(indexer) - df = DataFrame({'A' : A, 'B' : B, - 'C' : np.random.randn(100)}) + df = DataFrame({'A': A, 'B': B, + 'C': np.random.randn(100)}) result = df.sort_index(by=['A', 'B'], ascending=[1, 0]) @@ -6209,7 +6209,7 @@ def test_frame_column_inplace_sort_exception(self): self.assertRaises(Exception, s.sort) cp = s.copy() - cp.sort() # it works! + cp.sort() # it works! def test_combine_first(self): # disjoint @@ -6263,34 +6263,33 @@ def test_combine_first(self): comb = self.empty.combine_first(self.frame) assert_frame_equal(comb, self.frame) - comb = self.frame.combine_first(DataFrame(index=["faz","boo"])) + comb = self.frame.combine_first(DataFrame(index=["faz", "boo"])) self.assertTrue("faz" in comb.index) # #2525 - df = DataFrame({'a': [1]}, index=[datetime(2012,1,1)]) + df = DataFrame({'a': [1]}, index=[datetime(2012, 1, 1)]) df2 = DataFrame({}, columns=['b']) result = df.combine_first(df2) self.assertTrue('b' in result) def test_combine_first_mixed_bug(self): - idx = Index(['a','b','c','e']) - ser1 = Series([5.0,-9.0,4.0,100.],index=idx) + idx = Index(['a', 'b', 'c', 'e']) + ser1 = Series([5.0, -9.0, 4.0, 100.], index=idx) ser2 = Series(['a', 'b', 'c', 'e'], index=idx) - ser3 = Series([12,4,5,97], index=idx) - - frame1 = DataFrame({"col0" : ser1, - "col2" : ser2, - "col3" : ser3}) + ser3 = Series([12, 4, 5, 97], index=idx) - idx = Index(['a','b','c','f']) - ser1 = Series([5.0,-9.0,4.0,100.], index=idx) - ser2 = Series(['a','b','c','f'], index=idx) - ser3 = Series([12,4,5,97],index=idx) + frame1 = DataFrame({"col0": ser1, + "col2": ser2, + "col3": ser3}) - frame2 = DataFrame({"col1" : ser1, - "col2" : ser2, - "col5" : ser3}) + idx = Index(['a', 'b', 'c', 'f']) + ser1 = Series([5.0, -9.0, 4.0, 100.], index=idx) + ser2 = Series(['a', 'b', 'c', 'f'], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + frame2 = DataFrame({"col1": ser1, + "col2": ser2, + "col5": ser3}) combined = frame1.combine_first(frame2) self.assertEqual(len(combined.columns), 5) @@ -6353,10 +6352,10 @@ def test_update_raise(self): [1.5, nan, 3]]) other = DataFrame([[2., nan], - [nan, 7]], index=[1, 3], columns=[1,2]) + [nan, 7]], index=[1, 3], columns=[1, 2]) np.testing.assert_raises(Exception, df.update, *(other,), - **{'raise_conflict' : True}) + **{'raise_conflict': True}) def test_update_from_non_df(self): d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])} @@ -6408,17 +6407,17 @@ def test_combineAdd(self): assert_frame_equal(comb, self.frame) # integer corner case - df1 = DataFrame({'x':[5]}) - df2 = DataFrame({'x':[1]}) - df3 = DataFrame({'x':[6]}) + df1 = DataFrame({'x': [5]}) + df2 = DataFrame({'x': [1]}) + df3 = DataFrame({'x': [6]}) comb = df1.combineAdd(df2) assert_frame_equal(comb, df3) # mixed type GH2191 - df1 = DataFrame({'A' : [1, 2], 'B' : [3, 4]}) - df2 = DataFrame({'A' : [1, 2], 'C' : [5, 6]}) + df1 = DataFrame({'A': [1, 2], 'B': [3, 4]}) + df2 = DataFrame({'A': [1, 2], 'C': [5, 6]}) rs = df1.combineAdd(df2) - xp = DataFrame({'A' : [2, 4], 'B' : [3, 4.], 'C' : [5, 6.]}) + xp = DataFrame({'A': [2, 4], 'B': [3, 4.], 'C': [5, 6.]}) assert_frame_equal(xp, rs) # TODO: test integer fill corner? @@ -6468,17 +6467,17 @@ def test_get_X_columns(self): # numeric and object columns # Booleans get casted to float in DataFrame, so skip for now - df = DataFrame({'a' : [1, 2, 3], - # 'b' : [True, False, True], - 'c' : ['foo', 'bar', 'baz'], - 'd' : [None, None, None], - 'e' : [3.14, 0.577, 2.773]}) + df = DataFrame({'a': [1, 2, 3], + # 'b' : [True, False, True], + 'c': ['foo', 'bar', 'baz'], + 'd': [None, None, None], + 'e': [3.14, 0.577, 2.773]}) self.assert_(np.array_equal(df._get_numeric_data().columns, ['a', 'e'])) def test_get_numeric_data(self): - df = DataFrame({'a' : 1., 'b' : 2, 'c' : 'foo'}, + df = DataFrame({'a': 1., 'b': 2, 'c': 'foo'}, index=np.arange(10)) result = df._get_numeric_data() @@ -6526,13 +6525,13 @@ def test_sum(self): def test_stat_operators_attempt_obj_array(self): data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, - 0.00067695870775883013], + 0.00067695870775883013], 'b': [-0, -0, 0.0], 'c': [0.00031111847529610595, 0.0014902627951905339, -0.00094099200035979691] } df1 = DataFrame(data, index=['foo', 'bar', 'baz'], - dtype='O') + dtype='O') methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max'] # GH #676 @@ -6580,7 +6579,7 @@ def test_cummin(self): assert_frame_equal(cummin, expected) # works - df = DataFrame({'A' : np.arange(20)}, index=np.arange(20)) + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) result = df.cummin() # fix issue @@ -6603,14 +6602,13 @@ def test_cummax(self): assert_frame_equal(cummax, expected) # works - df = DataFrame({'A' : np.arange(20)}, index=np.arange(20)) + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) result = df.cummax() # fix issue cummax_xs = self.tsframe.cummax(axis=1) self.assertEqual(np.shape(cummax_xs), np.shape(self.tsframe)) - def test_max(self): self._check_stat_op('max', np.max) self._check_stat_op('max', np.max, frame=self.intframe) @@ -6697,7 +6695,7 @@ def wrapper(x): result1 = f(axis=1, skipna=False) assert_series_equal(result0, frame.apply(wrapper)) assert_series_equal(result1, frame.apply(wrapper, axis=1), - check_dtype=False) # HACK: win32 + check_dtype=False) # HACK: win32 else: skipna_wrapper = alternative wrapper = alternative @@ -6743,7 +6741,7 @@ def test_sum_corner(self): def test_sum_object(self): values = self.frame.values.astype(int) frame = DataFrame(values, index=self.frame.index, - columns=self.frame.columns) + columns=self.frame.columns) deltas = frame * timedelta(1) deltas.sum() @@ -6795,7 +6793,7 @@ def test_quantile(self): self.assertEqual(q['A'], scoreatpercentile(self.intframe['A'], 10)) # test degenerate case - q = DataFrame({'x':[],'y':[]}).quantile(0.1, axis=0) + q = DataFrame({'x': [], 'y': []}).quantile(0.1, axis=0) assert(np.isnan(q['x']) and np.isnan(q['y'])) def test_cumsum(self): @@ -6814,7 +6812,7 @@ def test_cumsum(self): assert_frame_equal(cumsum, expected) # works - df = DataFrame({'A' : np.arange(20)}, index=np.arange(20)) + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) result = df.cumsum() # fix issue @@ -6855,7 +6853,7 @@ def test_rank(self): ranks0 = self.frame.rank() ranks1 = self.frame.rank(1) - mask = np.isnan(self.frame.values) + mask = np.isnan(self.frame.values) fvals = self.frame.fillna(np.inf).values @@ -6882,7 +6880,7 @@ def test_rank(self): def test_rank2(self): from datetime import datetime - df = DataFrame([['b','c','a'],['a','c','b']]) + df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) result = df.rank(1, numeric_only=False) assert_frame_equal(result, expected) @@ -6891,7 +6889,7 @@ def test_rank2(self): result = df.rank(0, numeric_only=False) assert_frame_equal(result, expected) - df = DataFrame([['b',np.nan,'a'],['a','c','b']]) + df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) result = df.rank(1, numeric_only=False) assert_frame_equal(result, expected) @@ -6924,7 +6922,7 @@ def test_rank_na_option(self): self.frame['C'][::4] = np.nan self.frame['D'][::5] = np.nan - #bottom + # bottom ranks0 = self.frame.rank(na_option='bottom') ranks1 = self.frame.rank(1, na_option='bottom') @@ -6936,7 +6934,7 @@ def test_rank_na_option(self): assert_almost_equal(ranks0.values, exp0) assert_almost_equal(ranks1.values, exp1) - #top + # top ranks0 = self.frame.rank(na_option='top') ranks1 = self.frame.rank(1, na_option='top') @@ -6951,9 +6949,9 @@ def test_rank_na_option(self): assert_almost_equal(ranks0.values, exp0) assert_almost_equal(ranks1.values, exp1) - #descending + # descending - #bottom + # bottom ranks0 = self.frame.rank(na_option='top', ascending=False) ranks1 = self.frame.rank(1, na_option='top', ascending=False) @@ -6965,9 +6963,9 @@ def test_rank_na_option(self): assert_almost_equal(ranks0.values, exp0) assert_almost_equal(ranks1.values, exp1) - #descending + # descending - #top + # top ranks0 = self.frame.rank(na_option='bottom', ascending=False) ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) @@ -6982,7 +6980,6 @@ def test_rank_na_option(self): assert_almost_equal(ranks0.values, exp0) assert_almost_equal(ranks1.values, exp1) - def test_describe(self): desc = self.tsframe.describe() desc = self.mixed_frame.describe() @@ -6998,21 +6995,21 @@ def test_describe_percentiles(self): assert '2.5%' in desc.index def test_describe_no_numeric(self): - df = DataFrame({'A' : ['foo', 'foo', 'bar'] * 8, - 'B' : ['a', 'b', 'c', 'd'] * 6}) + df = DataFrame({'A': ['foo', 'foo', 'bar'] * 8, + 'B': ['a', 'b', 'c', 'd'] * 6}) desc = df.describe() expected = DataFrame(dict((k, v.describe()) for k, v in df.iteritems()), columns=df.columns) assert_frame_equal(desc, expected) - df = DataFrame({'time' : self.tsframe.index}) + df = DataFrame({'time': self.tsframe.index}) desc = df.describe() assert(desc.time['first'] == min(self.tsframe.index)) def test_describe_empty_int_columns(self): df = DataFrame([[0, 1], [1, 2]]) - desc = df[df[0] < 0].describe() #works + desc = df[df[0] < 0].describe() # works assert_series_equal(desc.xs('count'), Series([0, 0], dtype=float, name='count')) self.assert_(isnull(desc.ix[1:]).all().all()) @@ -7030,13 +7027,13 @@ def test_get_axis_etc(self): self.assertRaises(Exception, f._get_axis_number, 2) def test_combine_first_mixed(self): - a = Series(['a','b'], index=range(2)) + a = Series(['a', 'b'], index=range(2)) b = Series(range(2), index=range(2)) - f = DataFrame({'A' : a, 'B' : b}) + f = DataFrame({'A': a, 'B': b}) - a = Series(['a','b'], index=range(5, 7)) + a = Series(['a', 'b'], index=range(5, 7)) b = Series(range(2), index=range(5, 7)) - g = DataFrame({'A' : a, 'B' : b}) + g = DataFrame({'A': a, 'B': b}) combined = f.combine_first(g) @@ -7046,8 +7043,8 @@ def test_more_asMatrix(self): def test_reindex_boolean(self): frame = DataFrame(np.ones((10, 2), dtype=bool), - index=np.arange(0, 20, 2), - columns=[0, 2]) + index=np.arange(0, 20, 2), + columns=[0, 2]) reindexed = frame.reindex(np.arange(10)) self.assert_(reindexed.values.dtype == np.object_) @@ -7094,7 +7091,7 @@ def test_reindex_axis(self): assert_frame_equal(newFrame, self.frame) def test_reindex_with_nans(self): - df = DataFrame([[1,2], [3,4], [np.nan,np.nan], [7,8], [9,10]], + df = DataFrame([[1, 2], [3, 4], [np.nan, np.nan], [7, 8], [9, 10]], columns=['a', 'b'], index=[100.0, 101.0, np.nan, 102.0, 103.0]) @@ -7132,9 +7129,9 @@ def test_reindex_multi(self): assert_frame_equal(result, expected) - df = DataFrame(np.random.randn(5, 3) + 1j, columns=['a','b','c']) + df = DataFrame(np.random.randn(5, 3) + 1j, columns=['a', 'b', 'c']) - result = df.reindex(index=[0,1], columns=['a', 'b']) + result = df.reindex(index=[0, 1], columns=['a', 'b']) expected = df.reindex([0, 1]).reindex(columns=['a', 'b']) assert_frame_equal(result, expected) @@ -7164,7 +7161,7 @@ def test_count_objects(self): def test_cumsum_corner(self): dm = DataFrame(np.arange(20).reshape(4, 5), - index=range(4), columns=range(5)) + index=range(4), columns=range(5)) result = dm.cumsum() #---------------------------------------------------------------------- @@ -7172,7 +7169,7 @@ def test_cumsum_corner(self): def test_stack_unstack(self): stacked = self.frame.stack() - stacked_df = DataFrame({'foo' : stacked, 'bar' : stacked}) + stacked_df = DataFrame({'foo': stacked, 'bar': stacked}) unstacked = stacked.unstack() unstacked_df = stacked_df.unstack() @@ -7207,12 +7204,12 @@ def test_unstack_to_series(self): # check NA handling data = DataFrame({'x': [1, 2, np.NaN], 'y': [3.0, 4, np.NaN]}) - data.index = Index(['a','b','c']) + data.index = Index(['a', 'b', 'c']) result = data.unstack() - midx = MultiIndex(levels=[['x','y'],['a','b','c']], - labels=[[0,0,0,1,1,1],[0,1,2,0,1,2]]) - expected = Series([1,2,np.NaN,3,4,np.NaN], index=midx) + midx = MultiIndex(levels=[['x', 'y'], ['a', 'b', 'c']], + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx) assert_series_equal(result, expected) @@ -7224,7 +7221,7 @@ def test_unstack_to_series(self): def test_reset_index(self): stacked = self.frame.stack()[::2] - stacked = DataFrame({'foo' : stacked, 'bar' : stacked}) + stacked = DataFrame({'foo': stacked, 'bar': stacked}) names = ['first', 'second'] stacked.index.names = names @@ -7280,7 +7277,7 @@ def test_reset_index(self): xp = self.frame.reset_index().set_index(['index', 'B']) assert_frame_equal(rs, xp) - #test resetting in place + # test resetting in place df = self.frame.copy() resetted = self.frame.reset_index() res = df.reset_index(inplace=True) @@ -7295,8 +7292,8 @@ def test_reset_index(self): assert_frame_equal(rs, xp) def test_reset_index_right_dtype(self): - time = np.arange(0.0, 10, np.sqrt(2)/2) - s1 = Series((9.81 * time ** 2) /2, + time = np.arange(0.0, 10, np.sqrt(2) / 2) + s1 = Series((9.81 * time ** 2) / 2, index=Index(time, name='time'), name='speed') df = DataFrame(s1) @@ -7350,10 +7347,8 @@ def test_reset_index_multiindex_col(self): ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) - #---------------------------------------------------------------------- # Tests to cope with refactored internals - def test_as_matrix_numeric_cols(self): self.frame['foo'] = 'bar' @@ -7379,7 +7374,7 @@ def test_constructor_ndarray_copy(self): def test_constructor_series_copy(self): series = self.frame._series - df = DataFrame({'A' : series['A']}) + df = DataFrame({'A': series['A']}) df['A'][:] = 5 self.assert_(not (series['A'] == 5).all()) @@ -7479,9 +7474,9 @@ def test_boolean_indexing(self): data=np.ones((len(idx), len(cols)))) expected = DataFrame(index=idx, columns=cols, - data=np.array([[0.0, 0.5, 1.0], - [1.5, 2.0, -1], - [-1, -1, -1]], dtype=float)) + data=np.array([[0.0, 0.5, 1.0], + [1.5, 2.0, -1], + [-1, -1, -1]], dtype=float)) df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) @@ -7535,12 +7530,12 @@ def test_dot(self): expected = DataFrame(np.dot(a.values, b.values), index=['a', 'b', 'c'], columns=['one', 'two']) - #Check alignment + # Check alignment b1 = b.reindex(index=reversed(b.index)) result = a.dot(b) assert_frame_equal(result, expected) - #Check series argument + # Check series argument result = a.dot(b['one']) assert_series_equal(result, expected['one']) result = a.dot(b1['one']) @@ -7555,8 +7550,8 @@ def test_dot(self): self.assertRaises(Exception, a.dot, row[:-1]) - a = np.random.rand(1,5) - b = np.random.rand(5,1) + a = np.random.rand(1, 5) + b = np.random.rand(5, 1) A = DataFrame(a) B = DataFrame(b) @@ -7577,7 +7572,8 @@ def test_idxmin(self): for axis in [0, 1]: for df in [frame, self.intframe]: result = df.idxmin(axis=axis, skipna=skipna) - expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) + expected = df.apply( + Series.idxmin, axis=axis, skipna=skipna) assert_series_equal(result, expected) self.assertRaises(Exception, frame.idxmin, axis=2) @@ -7590,14 +7586,15 @@ def test_idxmax(self): for axis in [0, 1]: for df in [frame, self.intframe]: result = df.idxmax(axis=axis, skipna=skipna) - expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) + expected = df.apply( + Series.idxmax, axis=axis, skipna=skipna) assert_series_equal(result, expected) self.assertRaises(Exception, frame.idxmax, axis=2) def test_stale_cached_series_bug_473(self): - Y = DataFrame(np.random.random((4, 4)), index=('a', 'b','c','d'), - columns=('e','f','g','h')) + Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), + columns=('e', 'f', 'g', 'h')) repr(Y) Y['e'] = Y['e'].astype('object') Y['g']['c'] = np.NaN @@ -7626,7 +7623,6 @@ def test_any_all(self): self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True) self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True) - df = DataFrame(randn(10, 4)) > 0 df.any(1) df.all(1) @@ -7664,7 +7660,7 @@ def test_consolidate_datetime64(self): 2012-06-25 08:00,2012-06-26 12:00,0 2012-06-26 12:00,2012-06-27 08:00,77 """ - df = read_csv(StringIO(data), parse_dates=[0,1]) + df = read_csv(StringIO(data), parse_dates=[0, 1]) ser_starting = df.starting ser_starting.index = ser_starting.values @@ -7706,7 +7702,7 @@ def wrapper(x): result1 = f(axis=1, skipna=False) assert_series_equal(result0, frame.apply(wrapper)) assert_series_equal(result1, frame.apply(wrapper, axis=1), - check_dtype=False) # HACK: win32 + check_dtype=False) # HACK: win32 else: skipna_wrapper = alternative wrapper = alternative @@ -7755,11 +7751,11 @@ def __nonzero__(self): self.assert_(r1.all()) def test_strange_column_corruption_issue(self): - df = DataFrame(index=[0,1]) + df = DataFrame(index=[0, 1]) df[0] = nan wasCol = {} # uncommenting these makes the results match - #for col in xrange(100, 200): + # for col in xrange(100, 200): # wasCol[col] = 1 # df[col] = nan @@ -7782,5 +7778,5 @@ def test_strange_column_corruption_issue(self): import nose # nose.runmodule(argv=[__file__,'-vvs','-x', '--ipdb-failure'], # exit=False) - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 79f264840f37c..c3c4ddc1614b3 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -61,7 +61,7 @@ def test_plot(self): _check_plot_works(self.series[:5].plot, kind='barh') _check_plot_works(self.series[:10].plot, kind='barh') - Series(np.random.randn(10)).plot(kind='bar',color='black') + Series(np.random.randn(10)).plot(kind='bar', color='black') @slow def test_bar_colors(self): @@ -128,7 +128,7 @@ def test_rotation(self): @slow def test_irregular_datetime(self): rng = date_range('1/1/2000', '3/1/2000') - rng = rng[[0,1,2,3,5,9,10,11,12]] + rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xp = datetime(1999, 1, 1).toordinal() @@ -166,12 +166,13 @@ def test_bootstrap_plot(self): from pandas.tools.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, self.ts, size=10) + class TestDataFramePlots(unittest.TestCase): @classmethod def setUpClass(cls): - #import sys - #if 'IPython' in sys.modules: + # import sys + # if 'IPython' in sys.modules: # raise nose.SkipTest try: @@ -187,7 +188,7 @@ def test_plot(self): _check_plot_works(df.plot, subplots=True) _check_plot_works(df.plot, subplots=True, use_index=False) - df = DataFrame({'x':[1,2], 'y':[3,4]}) + df = DataFrame({'x': [1, 2], 'y': [3, 4]}) self._check_plot_fails(df.plot, kind='line', blarg=True) df = DataFrame(np.random.rand(10, 3), @@ -215,7 +216,7 @@ def test_plot(self): (u'\u03b4', 6), (u'\u03b4', 7)], names=['i0', 'i1']) columns = MultiIndex.from_tuples([('bar', u'\u0394'), - ('bar', u'\u0395')], names=['c0', 'c1']) + ('bar', u'\u0395')], names=['c0', 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) @@ -265,7 +266,6 @@ def test_plot_xy(self): # columns.inferred_type == 'mixed' # TODO add MultiIndex test - @slow def test_xcompat(self): import pandas as pd @@ -289,7 +289,7 @@ def test_xcompat(self): self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) plt.close('all') - #useful if you're plotting a bunch together + # useful if you're plotting a bunch together with pd.plot_params.use('x_compat', True): ax = df.plot() lines = ax.get_lines() @@ -361,15 +361,15 @@ def test_plot_bar(self): @slow def test_bar_stacked_center(self): - #GH2157 - df = DataFrame({'A' : [3] * 5, 'B' : range(5)}, index = range(5)) + # GH2157 + df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) ax = df.plot(kind='bar', stacked='True', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width() / 2) @slow def test_bar_center(self): - df = DataFrame({'A' : [3] * 5, 'B' : range(5)}, index = range(5)) + df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) ax = df.plot(kind='bar', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width()) @@ -395,8 +395,8 @@ def test_boxplot(self): _check_plot_works(df.boxplot, notch=1) _check_plot_works(df.boxplot, by='indic', notch=1) - df = DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] ) - df['X'] = Series(['A','A','A','A','A','B','B','B','B','B']) + df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) + df['X'] = Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) _check_plot_works(df.boxplot, by='X') @slow @@ -415,7 +415,7 @@ def test_hist(self): _check_plot_works(df.hist) _check_plot_works(df.hist, grid=False) - #make sure layout is handled + # make sure layout is handled df = DataFrame(np.random.randn(100, 3)) _check_plot_works(df.hist) axes = df.hist(grid=False) @@ -424,14 +424,14 @@ def test_hist(self): df = DataFrame(np.random.randn(100, 1)) _check_plot_works(df.hist) - #make sure layout is handled + # make sure layout is handled df = DataFrame(np.random.randn(100, 6)) _check_plot_works(df.hist) - #make sure sharex, sharey is handled + # make sure sharex, sharey is handled _check_plot_works(df.hist, sharex=True, sharey=True) - #make sure kwargs are handled + # make sure kwargs are handled ser = df[0] xf, yf = 20, 20 xrot, yrot = 30, 30 @@ -461,6 +461,7 @@ def test_scatter(self): df = DataFrame(np.random.randn(100, 4)) import pandas.tools.plotting as plt + def scat(**kwds): return plt.scatter_matrix(df, **kwds) _check_plot_works(scat) @@ -501,7 +502,8 @@ def test_parallel_coordinates(self): _check_plot_works(parallel_coordinates, df, 'Name', colors=['dodgerblue', 'aquamarine', 'seagreen']) - df = read_csv(path, header=None, skiprows=1, names=[1,2,4,8, 'Name']) + df = read_csv( + path, header=None, skiprows=1, names=[1, 2, 4, 8, 'Name']) _check_plot_works(parallel_coordinates, df, 'Name', use_columns=True) _check_plot_works(parallel_coordinates, df, 'Name', xticks=[1, 5, 25, 125]) @@ -604,8 +606,8 @@ class TestDataFrameGroupByPlots(unittest.TestCase): @classmethod def setUpClass(cls): - #import sys - #if 'IPython' in sys.modules: + # import sys + # if 'IPython' in sys.modules: # raise nose.SkipTest try: @@ -616,8 +618,8 @@ def setUpClass(cls): @slow def test_boxplot(self): - df = DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] ) - df['X'] = Series(['A','A','A','A','A','B','B','B','B','B']) + df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) + df['X'] = Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) grouped = df.groupby(by='X') _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) @@ -632,8 +634,6 @@ def test_boxplot(self): _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) - - @slow def test_series_plot_color_kwargs(self): # #1890 @@ -650,7 +650,8 @@ def test_time_series_plot_color_kwargs(self): import matplotlib.pyplot as plt plt.close('all') - ax = Series(np.arange(12) + 1, index=date_range('1/1/2000', periods=12)).plot(color='green') + ax = Series(np.arange(12) + 1, index=date_range( + '1/1/2000', periods=12)).plot(color='green') line = ax.get_lines()[0] self.assert_(line.get_color() == 'green') @@ -672,6 +673,7 @@ def test_grouped_hist(self): PNG_PATH = 'tmp.png' + def _check_plot_works(f, *args, **kwargs): import matplotlib.pyplot as plt @@ -691,10 +693,11 @@ def _check_plot_works(f, *args, **kwargs): plt.savefig(PNG_PATH) os.remove(PNG_PATH) + def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index ec7f5421a0aa9..b9ffcc553bca6 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -24,12 +24,13 @@ import pandas.util.testing as tm + def commonSetUp(self): self.dateRange = bdate_range('1/1/2005', periods=250) self.stringIndex = Index([rands(8).upper() for x in xrange(250)]) self.groupId = Series([x[0] for x in self.stringIndex], - index=self.stringIndex) + index=self.stringIndex) self.groupDict = dict((k, v) for k, v in self.groupId.iteritems()) self.columnIndex = Index(['A', 'B', 'C', 'D', 'E']) @@ -41,6 +42,7 @@ def commonSetUp(self): self.timeMatrix = DataFrame(randMat, columns=self.columnIndex, index=self.dateRange) + class TestGroupBy(unittest.TestCase): _multiprocess_can_split_ = True @@ -53,12 +55,12 @@ def setUp(self): self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) - self.df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B' : ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C' : np.random.randn(8), - 'D' : np.random.randn(8)}) + self.df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], @@ -68,18 +70,18 @@ def setUp(self): self.mframe = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) - self.three_group = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B' : ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C' : ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D' : np.random.randn(11), - 'E' : np.random.randn(11), - 'F' : np.random.randn(11)}) + self.three_group = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', + 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', + 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', + 'dull', 'shiny', 'shiny', 'dull', + 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) def test_basic(self): data = Series(np.arange(9) // 3, index=np.arange(9)) @@ -96,7 +98,7 @@ def test_basic(self): agged = grouped.aggregate(np.mean) self.assertEqual(agged[1], 1) - assert_series_equal(agged, grouped.agg(np.mean)) # shorthand + assert_series_equal(agged, grouped.agg(np.mean)) # shorthand assert_series_equal(agged, grouped.mean()) # Cython only returning floating point for now... @@ -111,13 +113,13 @@ def test_basic(self): # complex agg agged = grouped.aggregate([np.mean, np.std]) - agged = grouped.aggregate({'one' : np.mean, - 'two' : np.std}) + agged = grouped.aggregate({'one': np.mean, + 'two': np.std}) group_constants = { - 0 : 10, - 1 : 20, - 2 : 30 + 0: 10, + 1: 20, + 2: 30 } agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) self.assertEqual(agged[1], 21) @@ -176,7 +178,7 @@ def test_groupby_dict_mapping(self): assert_series_equal(result, expected) s = Series([1., 2., 3., 4.], index=list('abcd')) - mapping = {'a' : 0, 'b' : 0, 'c' : 1, 'd' : 1} + mapping = {'a': 0, 'b': 0, 'c': 1, 'd': 1} result = s.groupby(mapping).mean() result2 = s.groupby(mapping).agg(np.mean) @@ -216,10 +218,10 @@ def test_agg_datetimes_mixed(self): 'date': [x[1] for x in data], 'value': [x[2] for x in data]}) - df1['weights'] = df1['value']/df1['value'].sum() + df1['weights'] = df1['value'] / df1['value'].sum() gb1 = df1.groupby('date').aggregate(np.sum) - df2['weights'] = df1['value']/df1['value'].sum() + df2['weights'] = df1['value'] / df1['value'].sum() gb2 = df2.groupby('date').aggregate(np.sum) assert(len(gb1) == len(gb2)) @@ -294,7 +296,7 @@ def test_agg_python_multiindex(self): def test_apply_describe_bug(self): grouped = self.mframe.groupby(level='first') - result = grouped.describe() # it works! + result = grouped.describe() # it works! def test_len(self): df = tm.makeTimeDataFrame() @@ -311,20 +313,21 @@ def test_len(self): def test_groups(self): grouped = self.df.groupby(['A']) groups = grouped.groups - self.assert_(groups is grouped.groups) # caching works + self.assert_(groups is grouped.groups) # caching works for k, v in grouped.groups.iteritems(): self.assert_((self.df.ix[v]['A'] == k).all()) grouped = self.df.groupby(['A', 'B']) groups = grouped.groups - self.assert_(groups is grouped.groups) # caching works + self.assert_(groups is grouped.groups) # caching works for k, v in grouped.groups.iteritems(): self.assert_((self.df.ix[v]['A'] == k[0]).all()) self.assert_((self.df.ix[v]['B'] == k[1]).all()) def test_aggregate_str_func(self): from pandas.util.compat import OrderedDict + def _check_results(grouped): # single series result = grouped['A'].agg('std') @@ -337,10 +340,11 @@ def _check_results(grouped): assert_frame_equal(result, expected) # group frame by function dict - result = grouped.agg(OrderedDict([['A' , 'var'], ['B' , 'std'], ['C' , 'mean']])) + result = grouped.agg( + OrderedDict([['A', 'var'], ['B', 'std'], ['C', 'mean']])) expected = DataFrame(OrderedDict([['A', grouped['A'].var()], - ['B', grouped['B'].std()], - ['C', grouped['C'].mean()]])) + ['B', grouped['B'].std()], + ['C', grouped['C'].mean()]])) assert_frame_equal(result, expected) by_weekday = self.tsframe.groupby(lambda x: x.weekday()) @@ -355,6 +359,7 @@ def test_aggregate_item_by_item(self): df = self.df.copy() df['E'] = ['a'] * len(self.df) grouped = self.df.groupby('A') + def aggfun(ser): return len(ser + 'a') result = grouped.agg(aggfun) @@ -376,7 +381,7 @@ def aggfun(ser): def test_basic_regression(self): # regression - T = [1.0*x for x in range(1,10) *10][:1095] + T = [1.0 * x for x in range(1, 10) * 10][:1095] result = Series(T, range(0, len(T))) groupings = np.random.random((1100,)) @@ -415,7 +420,7 @@ def test_transform_broadcast(self): assert_fp_equal(res[col], agged[col]) # group columns - grouped = self.tsframe.groupby({'A' : 0, 'B' : 0, 'C' : 1, 'D' : 1}, + grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, axis=1) result = grouped.transform(np.mean) self.assert_(result.index.equals(self.tsframe.index)) @@ -530,18 +535,18 @@ def test_series_agg_multikey(self): assert_series_equal(result, expected) def test_series_agg_multi_pure_python(self): - data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B' : ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C' : ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D' : np.random.randn(11), - 'E' : np.random.randn(11), - 'F' : np.random.randn(11)}) + data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', + 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', + 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', + 'dull', 'shiny', 'shiny', 'dull', + 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) def bad(x): assert(len(x.base) > 0) @@ -565,8 +570,8 @@ def test_frame_describe_multikey(self): expected = grouped[col].describe() assert_series_equal(result[col], expected) - groupedT = self.tsframe.groupby({'A' : 0, 'B' : 0, - 'C' : 1, 'D' : 1}, axis=1) + groupedT = self.tsframe.groupby({'A': 0, 'B': 0, + 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() for name, group in groupedT: @@ -622,7 +627,7 @@ def test_grouping_is_iterable(self): def test_frame_groupby_columns(self): mapping = { - 'A' : 0, 'B' : 0, 'C' : 1, 'D' : 1 + 'A': 0, 'B': 0, 'C': 1, 'D': 1 } grouped = self.tsframe.groupby(mapping, axis=1) @@ -652,7 +657,7 @@ def test_frame_set_name_single(self): result = grouped.agg(np.mean) self.assert_(result.index.name == 'A') - result = grouped.agg({'C' : np.mean, 'D' : np.std}) + result = grouped.agg({'C': np.mean, 'D': np.std}) self.assert_(result.index.name == 'A') result = grouped['C'].mean() @@ -662,7 +667,7 @@ def test_frame_set_name_single(self): result = grouped['C'].agg([np.mean, np.std]) self.assert_(result.index.name == 'A') - result = grouped['C'].agg({'foo' : np.mean, 'bar' : np.std}) + result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) self.assert_(result.index.name == 'A') def test_multi_iter(self): @@ -686,9 +691,9 @@ def test_multi_iter(self): def test_multi_iter_frame(self): k1 = np.array(['b', 'b', 'b', 'a', 'a', 'a']) k2 = np.array(['1', '2', '1', '2', '1', '2']) - df = DataFrame({'v1' : np.random.randn(6), - 'v2' : np.random.randn(6), - 'k1' : k1, 'k2' : k2}, + df = DataFrame({'v1': np.random.randn(6), + 'v2': np.random.randn(6), + 'k1': k1, 'k2': k2}, index=['one', 'two', 'three', 'four', 'five', 'six']) grouped = df.groupby(['k1', 'k2']) @@ -743,10 +748,10 @@ def test_multi_func(self): expected.ix[:, ['C', 'D']]) # some "groups" with no data - df = DataFrame({'v1' : np.random.randn(6), - 'v2' : np.random.randn(6), - 'k1' : np.array(['b', 'b', 'b', 'a', 'a', 'a']), - 'k2' : np.array(['1', '1', '1', '2', '2', '2'])}, + df = DataFrame({'v1': np.random.randn(6), + 'v2': np.random.randn(6), + 'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']), + 'k2': np.array(['1', '1', '1', '2', '2', '2'])}, index=['one', 'two', 'three', 'four', 'five', 'six']) # only verify that it works for now grouped = df.groupby(['k1', 'k2']) @@ -756,23 +761,23 @@ def test_multi_key_multiple_functions(self): grouped = self.df.groupby(['A', 'B'])['C'] agged = grouped.agg([np.mean, np.std]) - expected = DataFrame({'mean' : grouped.agg(np.mean), - 'std' : grouped.agg(np.std)}) + expected = DataFrame({'mean': grouped.agg(np.mean), + 'std': grouped.agg(np.std)}) assert_frame_equal(agged, expected) def test_frame_multi_key_function_list(self): - data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B' : ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C' : ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D' : np.random.randn(11), - 'E' : np.random.randn(11), - 'F' : np.random.randn(11)}) + data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', + 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', + 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', + 'dull', 'shiny', 'shiny', 'dull', + 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) grouped = data.groupby(['A', 'B']) funcs = [np.mean, np.std] @@ -827,15 +832,15 @@ def test_groupby_as_index_agg(self): expected = grouped.mean() assert_frame_equal(result, expected) - result2 = grouped.agg(OrderedDict([['C' , np.mean], ['D' , np.sum]])) + result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]])) expected2 = grouped.mean() expected2['D'] = grouped.sum()['D'] assert_frame_equal(result2, expected2) grouped = self.df.groupby('A', as_index=True) expected3 = grouped['C'].sum() - expected3 = DataFrame(expected3).rename(columns={'C' : 'Q'}) - result3 = grouped['C'].agg({'Q' : np.sum}) + expected3 = DataFrame(expected3).rename(columns={'C': 'Q'}) + result3 = grouped['C'].agg({'Q': np.sum}) assert_frame_equal(result3, expected3) # multi-key @@ -846,14 +851,14 @@ def test_groupby_as_index_agg(self): expected = grouped.mean() assert_frame_equal(result, expected) - result2 = grouped.agg(OrderedDict([['C' , np.mean], ['D' , np.sum]])) + result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]])) expected2 = grouped.mean() expected2['D'] = grouped.sum()['D'] assert_frame_equal(result2, expected2) expected3 = grouped['C'].sum() - expected3 = DataFrame(expected3).rename(columns={'C' : 'Q'}) - result3 = grouped['C'].agg({'Q' : np.sum}) + expected3 = DataFrame(expected3).rename(columns={'C': 'Q'}) + result3 = grouped['C'].agg({'Q': np.sum}) assert_frame_equal(result3, expected3) def test_multifunc_select_col_integer_cols(self): @@ -861,7 +866,7 @@ def test_multifunc_select_col_integer_cols(self): df.columns = np.arange(len(df.columns)) # it works! - result = df.groupby(1, as_index=False)[2].agg({'Q' : np.mean}) + result = df.groupby(1, as_index=False)[2].agg({'Q': np.mean}) def test_as_index_series_return_frame(self): grouped = self.df.groupby('A', as_index=False) @@ -978,7 +983,7 @@ def test_omit_nuisance(self): assert_frame_equal(result, expected) # won't work with axis = 1 - grouped = df.groupby({'A' : 0, 'C' : 0, 'D' : 1, 'E' : 1}, axis=1) + grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1) result = self.assertRaises(TypeError, grouped.agg, lambda x: x.sum(1, numeric_only=False)) @@ -991,11 +996,11 @@ def test_omit_nuisance_python_multiple(self): def test_empty_groups_corner(self): # handle empty groups - df = DataFrame({'k1' : np.array(['b', 'b', 'b', 'a', 'a', 'a']), - 'k2' : np.array(['1', '1', '1', '2', '2', '2']), - 'k3' : ['foo', 'bar'] * 3, - 'v1' : np.random.randn(6), - 'v2' : np.random.randn(6)}) + df = DataFrame({'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']), + 'k2': np.array(['1', '1', '1', '2', '2', '2']), + 'k3': ['foo', 'bar'] * 3, + 'v1': np.random.randn(6), + 'v2': np.random.randn(6)}) grouped = df.groupby(['k1', 'k2']) result = grouped.agg(np.mean) @@ -1047,9 +1052,9 @@ def test_nonsense_func(self): self.assertRaises(Exception, df.groupby, lambda x: x + 'foo') def test_cythonized_aggers(self): - data = {'A' : [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], - 'B' : ['A', 'B'] * 6, - 'C' : np.random.randn(12)} + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + 'B': ['A', 'B'] * 6, + 'C': np.random.randn(12)} df = DataFrame(data) df['C'][2:10:2] = nan @@ -1059,7 +1064,7 @@ def _testit(op): exp = {} for cat, group in grouped: exp[cat] = op(group['C']) - exp = DataFrame({'C' : exp}) + exp = DataFrame({'C': exp}) result = op(grouped) assert_frame_equal(result, exp) @@ -1097,7 +1102,7 @@ def test_cython_agg_nothing_to_agg(self): def test_cython_agg_frame_columns(self): # #2113 - df = DataFrame({'x': [1,2,3], 'y': [3,4,5]}) + df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) result = df.groupby(level=0, axis='columns').mean() result = df.groupby(level=0, axis='columns').mean() @@ -1193,9 +1198,9 @@ def test_groupby_level_mapper(self): frame = self.mframe deleveled = frame.reset_index() - mapper0 = {'foo' : 0, 'bar' : 0, - 'baz' : 1, 'qux' : 1} - mapper1 = {'one' : 0, 'two' : 0, 'three' : 1} + mapper0 = {'foo': 0, 'bar': 0, + 'baz': 1, 'qux': 1} + mapper1 = {'one': 0, 'two': 0, 'three': 1} result0 = frame.groupby(mapper0, level=0).sum() result1 = frame.groupby(mapper1, level=1).sum() @@ -1210,7 +1215,8 @@ def test_groupby_level_mapper(self): def test_groupby_level_0_nonmulti(self): # #1313 - a = Series([1,2,3,10,4,5,20,6], Index([1,2,3,1,4,5,2,6], name='foo')) + a = Series([1, 2, 3, 10, 4, 5, 20, 6], Index([1, 2, 3, 1, + 4, 5, 2, 6], name='foo')) result = a.groupby(level=0).sum() self.assertEquals(result.index.name, a.index.name) @@ -1236,9 +1242,9 @@ def test_cython_fail_agg(self): def test_apply_series_to_frame(self): def f(piece): - return DataFrame({'value' : piece, - 'demeaned' : piece - piece.mean(), - 'logged' : np.log(piece)}) + return DataFrame({'value': piece, + 'demeaned': piece - piece.mean(), + 'logged': np.log(piece)}) dr = bdate_range('1/1/2000', periods=100) ts = Series(np.random.randn(100), index=dr) @@ -1315,10 +1321,10 @@ def test_apply_no_name_column_conflict(self): grouped.apply(lambda x: x.sort('value')) def test_groupby_series_indexed_differently(self): - s1 = Series([5.0,-9.0,4.0,100.,-5.,55.,6.7], - index=Index(['a','b','c','d','e','f','g'])) - s2 = Series([1.0,1.0,4.0,5.0,5.0,7.0], - index=Index(['a','b','d','f','g','h'])) + s1 = Series([5.0, -9.0, 4.0, 100., -5., 55., 6.7], + index=Index(['a', 'b', 'c', 'd', 'e', 'f', 'g'])) + s2 = Series([1.0, 1.0, 4.0, 5.0, 5.0, 7.0], + index=Index(['a', 'b', 'd', 'f', 'g', 'h'])) grouped = s1.groupby(s2) agged = grouped.mean() @@ -1402,7 +1408,7 @@ def f(x, q=None): # values = np.random.randn(10) # shape = (5, 5) # label_list = [np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2], dtype=np.int32), - # np.array([1, 2, 3, 4, 0, 1, 2, 3, 3, 4], dtype=np.int32)] + # np.array([1, 2, 3, 4, 0, 1, 2, 3, 3, 4], dtype=np.int32)] # lib.group_aggregate(values, label_list, shape) @@ -1430,9 +1436,9 @@ def test_grouping_ndarray(self): assert_frame_equal(result, expected) def test_apply_typecast_fail(self): - df = DataFrame({'d' : [1.,1.,1.,2.,2.,2.], - 'c' : np.tile(['a','b','c'], 2), - 'v' : np.arange(1., 7.)}) + df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], + 'c': np.tile(['a', 'b', 'c'], 2), + 'v': np.arange(1., 7.)}) def f(group): v = group['v'] @@ -1449,9 +1455,9 @@ def f(group): def test_apply_multiindex_fail(self): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) - df = DataFrame({'d' : [1.,1.,1.,2.,2.,2.], - 'c' : np.tile(['a','b','c'], 2), - 'v' : np.arange(1., 7.)}, index=index) + df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], + 'c': np.tile(['a', 'b', 'c'], 2), + 'v': np.arange(1., 7.)}, index=index) def f(group): v = group['v'] @@ -1502,9 +1508,9 @@ def f(g): def test_transform_mixed_type(self): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) - df = DataFrame({'d' : [1.,1.,1.,2.,2.,2.], - 'c' : np.tile(['a','b','c'], 2), - 'v' : np.arange(1., 7.)}, index=index) + df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], + 'c': np.tile(['a', 'b', 'c'], 2), + 'v': np.arange(1., 7.)}, index=index) def f(group): group['g'] = group['d'] * 2 @@ -1544,7 +1550,7 @@ def test_groupby_series_with_name(self): result = self.df.groupby([self.df['A'], self.df['B']]).mean() result2 = self.df.groupby([self.df['A'], self.df['B']], - as_index=False).mean() + as_index=False).mean() self.assertEquals(result.index.names, ['A', 'B']) self.assert_('A' in result2) self.assert_('B' in result2) @@ -1611,8 +1617,8 @@ def test_groupby_list_infer_array_like(self): self.assertRaises(Exception, self.df.groupby, list(self.df['A'][:-1])) # pathological case of ambiguity - df = DataFrame({'foo' : [0, 1], 'bar' : [3, 4], - 'val' : np.random.randn(2)}) + df = DataFrame({'foo': [0, 1], 'bar': [3, 4], + 'val': np.random.randn(2)}) result = df.groupby(['foo', 'bar']).mean() expected = df.groupby([df['foo'], df['bar']]).mean()[['val']] @@ -1646,7 +1652,7 @@ def _check_work(gp): def test_panel_groupby(self): self.panel = tm.makePanel() tm.add_nans(self.panel) - grouped = self.panel.groupby({'ItemA' : 0, 'ItemB' : 0, 'ItemC' : 1}, + grouped = self.panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1}, axis='items') agged = grouped.mean() agged2 = grouped.agg(lambda x: x.mean('items')) @@ -1660,7 +1666,7 @@ def test_panel_groupby(self): self.assert_(np.array_equal(agged.major_axis, [1, 2])) - grouped = self.panel.groupby({'A' : 0, 'B' : 0, 'C' : 1, 'D' : 1}, + grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, axis='minor') agged = grouped.mean() self.assert_(np.array_equal(agged.minor_axis, [0, 1])) @@ -1696,9 +1702,9 @@ def test_int32_overflow(self): B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000))) A = np.arange(25000) - df = DataFrame({'A' : A, 'B' : B, - 'C' : A, 'D' : B, - 'E' : np.random.randn(25000)}) + df = DataFrame({'A': A, 'B': B, + 'C': A, 'D': B, + 'E': np.random.randn(25000)}) left = df.groupby(['A', 'B', 'C', 'D']).sum() right = df.groupby(['D', 'C', 'B', 'A']).sum() @@ -1708,11 +1714,11 @@ def test_int64_overflow(self): B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) A = np.arange(2500) - df = DataFrame({'A' : A, 'B' : B, - 'C' : A, 'D' : B, - 'E' : A, 'F' : B, - 'G' : A, 'H' : B, - 'values' : np.random.randn(2500)}) + df = DataFrame({'A': A, 'B': B, + 'C': A, 'D': B, + 'E': A, 'F': B, + 'G': A, 'H': B, + 'values': np.random.randn(2500)}) lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']) @@ -1736,10 +1742,10 @@ def test_int64_overflow(self): self.assert_(len(left) == len(right)) def test_groupby_sort_multi(self): - df = DataFrame({'a' : ['foo', 'bar', 'baz'], - 'b' : [3, 2, 1], - 'c' : [0, 1, 2], - 'd' : np.random.randn(3)}) + df = DataFrame({'a': ['foo', 'bar', 'baz'], + 'b': [3, 2, 1], + 'c': [0, 1, 2], + 'd': np.random.randn(3)}) tups = map(tuple, df[['a', 'b', 'c']].values) tups = com._asarray_tuplesafe(tups) @@ -1758,9 +1764,9 @@ def test_groupby_sort_multi(self): self.assert_(np.array_equal(result.index.values, tups[[2, 1, 0]])) - df = DataFrame({'a' : [0, 1, 2, 0, 1, 2], - 'b' : [0, 0, 0, 1, 1, 1], - 'd' : np.random.randn(6)}) + df = DataFrame({'a': [0, 1, 2, 0, 1, 2], + 'b': [0, 0, 0, 1, 1, 1], + 'd': np.random.randn(6)}) grouped = df.groupby(['a', 'b'])['d'] result = grouped.sum() _check_groupby(df, result, ['a', 'b'], 'd') @@ -1792,9 +1798,9 @@ def test_rank_apply(self): lab1 = np.random.randint(0, 100, size=500) lab2 = np.random.randint(0, 130, size=500) - df = DataFrame({'value' : np.random.randn(500), - 'key1' : lev1.take(lab1), - 'key2' : lev2.take(lab2)}) + df = DataFrame({'value': np.random.randn(500), + 'key1': lev1.take(lab1), + 'key2': lev2.take(lab2)}) result = df.groupby(['key1', 'key2']).value.rank() @@ -1808,7 +1814,7 @@ def test_rank_apply(self): def test_dont_clobber_name_column(self): df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'], - 'name' : ['foo', 'bar', 'baz'] * 2}) + 'name': ['foo', 'bar', 'baz'] * 2}) result = df.groupby('key').apply(lambda x: x) assert_frame_equal(result, df) @@ -1848,6 +1854,7 @@ def test_no_nonsense_name(self): def test_wrap_agg_out(self): grouped = self.three_group.groupby(['A', 'B']) + def func(ser): if ser.dtype == np.object: raise TypeError @@ -1860,12 +1867,12 @@ def func(ser): def test_multifunc_sum_bug(self): # GH #1065 - x = DataFrame(np.arange(9).reshape(3,3)) - x['test']=0 - x['fl']= [1.3,1.5,1.6] + x = DataFrame(np.arange(9).reshape(3, 3)) + x['test'] = 0 + x['fl'] = [1.3, 1.5, 1.6] grouped = x.groupby('test') - result = grouped.agg({'fl':'sum',2:'size'}) + result = grouped.agg({'fl': 'sum', 2: 'size'}) self.assert_(result['fl'].dtype == np.float64) def test_handle_dict_return_value(self): @@ -1934,33 +1941,35 @@ def test_more_flexible_frame_multi_function(self): grouped = self.df.groupby('A') - exmean = grouped.agg(OrderedDict([['C' , np.mean], ['D' , np.mean]])) - exstd = grouped.agg(OrderedDict([['C' , np.std], ['D' , np.std]])) + exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) + exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) expected = expected.swaplevel(0, 1, axis=1).sortlevel(0, axis=1) - d=OrderedDict([['C',[np.mean, np.std]],['D',[np.mean, np.std]]]) + d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) result = grouped.aggregate(d) assert_frame_equal(result, expected) # be careful - result = grouped.aggregate(OrderedDict([['C' , np.mean], - [ 'D' , [np.mean, np.std]]])) - expected = grouped.aggregate(OrderedDict([['C' , np.mean], - [ 'D' , [np.mean, np.std]]])) + result = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + expected = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) assert_frame_equal(result, expected) + def foo(x): + return np.mean(x) - def foo(x): return np.mean(x) - def bar(x): return np.std(x, ddof=1) - d=OrderedDict([['C' , np.mean], - ['D', OrderedDict([['foo', np.mean], - ['bar', np.std]])]]) + def bar(x): + return np.std(x, ddof=1) + d = OrderedDict([['C', np.mean], + ['D', OrderedDict([['foo', np.mean], + ['bar', np.std]])]]) result = grouped.aggregate(d) - d = OrderedDict([['C' , [np.mean]],['D' , [foo, bar]]]) + d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) expected = grouped.aggregate(d) assert_frame_equal(result, expected) @@ -1970,18 +1979,21 @@ def test_multi_function_flexible_mix(self): from pandas.util.compat import OrderedDict grouped = self.df.groupby('A') - d = OrderedDict([['C' , OrderedDict([['foo' , 'mean'], - ['bar' , 'std']])], - ['D' , 'sum']]) + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], + [ + 'bar', 'std']])], + ['D', 'sum']]) result = grouped.aggregate(d) - d2 = OrderedDict([['C' , OrderedDict([['foo' , 'mean'], - ['bar' , 'std']])], - ['D' ,[ 'sum']]]) + d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], + [ + 'bar', 'std']])], + ['D', ['sum']]]) result2 = grouped.aggregate(d2) - d3 = OrderedDict([['C' , OrderedDict([['foo' , 'mean'], - ['bar' , 'std']])], - ['D' , {'sum':'sum'}]]) + d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], + [ + 'bar', 'std']])], + ['D', {'sum': 'sum'}]]) expected = grouped.aggregate(d3) assert_frame_equal(result, expected) @@ -2067,7 +2079,8 @@ def test_groupby_reindex_inside_function(self): periods = 1000 ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) - df = DataFrame({'high': np.arange(periods), 'low': np.arange(periods)}, index=ind) + df = DataFrame({'high': np.arange( + periods), 'low': np.arange(periods)}, index=ind) def agg_before(hour, func, fix=False): """ @@ -2175,6 +2188,7 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): for k, v in expected.iteritems(): assert(result[k] == v) + def test_decons(): from pandas.core.groupby import decons_group_index, get_group_index @@ -2199,5 +2213,6 @@ def testit(label_list, shape): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure', '-s'], - exit=False) + nose.runmodule( + argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s'], + exit=False) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index cac895ed31b37..0bf41b485d278 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -24,8 +24,10 @@ import pandas as pd from pandas.lib import Timestamp + class TestIndex(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.unicodeIndex = tm.makeUnicodeIndex(100) self.strIndex = tm.makeStringIndex(100) @@ -81,7 +83,6 @@ def test_constructor(self): # arr = np.array(5.) # self.assertRaises(Exception, arr.view, Index) - def test_constructor_corner(self): # corner case self.assertRaises(Exception, Index, 0) @@ -182,7 +183,7 @@ def test_booleanindex(self): self.assertEqual(subIndex.get_loc(val), i) def test_fancy(self): - sl = self.strIndex[[1,2,3]] + sl = self.strIndex[[1, 2, 3]] for i in sl: self.assertEqual(i, sl[sl.get_loc(i)]) @@ -447,19 +448,20 @@ def test_drop(self): expected = self.strIndex[1:] self.assert_(dropped.equals(expected)) - ser = Index([1,2,3]) + ser = Index([1, 2, 3]) dropped = ser.drop(1) - expected = Index([2,3]) + expected = Index([2, 3]) self.assert_(dropped.equals(expected)) def test_tuple_union_bug(self): import pandas import numpy as np - aidx1 = np.array([(1, 'A'),(2, 'A'),(1, 'B'),(2, 'B')], dtype=[('num', - int),('let', 'a1')]) - aidx2 = np.array([(1, 'A'),(2, 'A'),(1, 'B'),(2, 'B'),(1,'C'),(2, - 'C')], dtype=[('num', int),('let', 'a1')]) + aidx1 = np.array( + [(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], dtype=[('num', + int), ('let', 'a1')]) + aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B'), (1, 'C'), (2, + 'C')], dtype=[('num', int), ('let', 'a1')]) idx1 = pandas.Index(aidx1) idx2 = pandas.Index(aidx2) @@ -467,7 +469,7 @@ def test_tuple_union_bug(self): # intersection broken? int_idx = idx1.intersection(idx2) # needs to be 1d like idx1 and idx2 - expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2))) + expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2))) self.assert_(int_idx.ndim == 1) self.assert_(int_idx.equals(expected)) @@ -506,7 +508,7 @@ def test_isin(self): self.assert_(result.dtype == np.bool_) def test_boolean_cmp(self): - values = [1,2,3,4] + values = [1, 2, 3, 4] idx = Index(values) res = (idx == values) @@ -515,8 +517,10 @@ def test_boolean_cmp(self): self.assert_(res.dtype == 'bool') self.assert_(not isinstance(res, Index)) + class TestInt64Index(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.index = Int64Index(np.arange(0, 20, 2)) @@ -804,7 +808,7 @@ def test_intersection(self): self.assert_(np.array_equal(result, expected)) def test_intersect_str_dates(self): - dt_dates = [datetime(2012,2,9) , datetime(2012,2,22)] + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] i1 = Index(dt_dates, dtype=object) i2 = Index(['aa'], dtype=object) @@ -842,8 +846,8 @@ def test_prevent_casting(self): self.assert_(result.dtype == np.object_) def test_take_preserve_name(self): - index = Int64Index([1,2,3,4], name='foo') - taken = index.take([3,0,1]) + index = Int64Index([1, 2, 3, 4], name='foo') + taken = index.take([3, 0, 1]) self.assertEqual(index.name, taken.name) def test_int_name_format(self): @@ -855,13 +859,14 @@ def test_int_name_format(self): repr(df) def test_print_unicode_columns(self): - df=pd.DataFrame({u"\u05d0":[1,2,3],"\u05d1":[4,5,6],"c":[7,8,9]}) - repr(df.columns) # should not raise UnicodeDecodeError + df = pd.DataFrame( + {u"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError def test_repr_summary(self): r = repr(pd.Index(np.arange(10000))) self.assertTrue(len(r) < 100) - self.assertTrue( "..." in r) + self.assertTrue("..." in r) def test_unicode_string_with_unicode(self): idx = Index(range(1000)) @@ -878,8 +883,10 @@ def test_bytestring_with_unicode(self): else: str(idx) + class TestMultiIndex(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) @@ -1078,7 +1085,7 @@ def test_getitem(self): # slice result = self.index[2:5] - expected = self.index[[2,3,4]] + expected = self.index[[2, 3, 4]] self.assert_(result.equals(expected)) # boolean @@ -1277,7 +1284,7 @@ def test_get_indexer(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) idx1 = index[:5] - idx2 = index[[1,3,5]] + idx2 = index[[1, 3, 5]] r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, [1, 3, -1]) @@ -1299,8 +1306,8 @@ def test_get_indexer(self): rexp1 = idx1.get_indexer(idx2) assert_almost_equal(r1, rexp1) - r1 = idx1.get_indexer([1,2,3]) - self.assert_( (r1 == [-1, -1, -1]).all() ) + r1 = idx1.get_indexer([1, 2, 3]) + self.assert_((r1 == [-1, -1, -1]).all()) # self.assertRaises(Exception, idx1.get_indexer, # list(list(zip(*idx2._tuple_index))[0])) @@ -1491,7 +1498,7 @@ def test_diff(self): def test_from_tuples(self): self.assertRaises(Exception, MultiIndex.from_tuples, []) - idx = MultiIndex.from_tuples( ((1,2),(3,4)), names=['a', 'b'] ) + idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) self.assertEquals(len(idx), 2) def test_argsort(self): @@ -1543,7 +1550,6 @@ def test_sortlevel_deterministic(self): sorted_idx, _ = index.sortlevel(1, ascending=False) self.assert_(sorted_idx.equals(expected[::-1])) - def test_dims(self): pass @@ -1620,7 +1626,7 @@ def test_insert(self): self.assertRaises(Exception, self.index.insert, 0, ('foo2',)) def test_take_preserve_name(self): - taken = self.index.take([3,0,1]) + taken = self.index.take([3, 0, 1]) self.assertEqual(taken.names, self.index.names) def test_join_level(self): @@ -1634,7 +1640,8 @@ def _check_how(other, how): self.assert_(join_index.levels[1].equals(exp_level)) # pare down levels - mask = np.array([x[1] in exp_level for x in self.index], dtype=bool) + mask = np.array( + [x[1] in exp_level for x in self.index], dtype=bool) exp_values = self.index.values[mask] self.assert_(np.array_equal(join_index.values, exp_values)) @@ -1711,13 +1718,13 @@ def test_tolist(self): self.assertEqual(result, exp) def test_repr_with_unicode_data(self): - d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]} - index=pd.DataFrame(d).set_index(["a","b"]).index - self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped + d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_unicode_string_with_unicode(self): - d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]} - idx=pd.DataFrame(d).set_index(["a","b"]).index + d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index if py3compat.PY3: str(idx) @@ -1725,14 +1732,15 @@ def test_unicode_string_with_unicode(self): unicode(idx) def test_bytestring_with_unicode(self): - d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]} - idx=pd.DataFrame(d).set_index(["a","b"]).index + d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index if py3compat.PY3: bytes(idx) else: str(idx) + def test_get_combined_index(): from pandas.core.index import _get_combined_index result = _get_combined_index([]) @@ -1740,6 +1748,6 @@ def test_get_combined_index(): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + # '--with-coverage', '--cover-package=pandas.core'], exit=False) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 3d9448ecdd7c5..9deddb802d1bf 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -9,7 +9,9 @@ import pandas.core.internals as internals import pandas.util.testing as tm -from pandas.util.testing import (assert_almost_equal, assert_frame_equal, randn) +from pandas.util.testing import ( + assert_almost_equal, assert_frame_equal, randn) + def assert_block_equal(left, right): assert_almost_equal(left.values, right.values) @@ -17,42 +19,51 @@ def assert_block_equal(left, right): assert(left.items.equals(right.items)) assert(left.ref_items.equals(right.ref_items)) + def get_float_mat(n, k): return np.repeat(np.atleast_2d(np.arange(k, dtype=float)), n, axis=0) TEST_COLS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] N = 10 + def get_float_ex(cols=['a', 'c', 'e']): floats = get_float_mat(N, len(cols)).T return make_block(floats, cols, TEST_COLS) + def get_complex_ex(cols=['h']): complexes = (get_float_mat(N, 1).T * 1j).astype(np.complex128) return make_block(complexes, cols, TEST_COLS) + def get_obj_ex(cols=['b', 'd']): mat = np.empty((N, 2), dtype=object) mat[:, 0] = 'foo' mat[:, 1] = 'bar' return make_block(mat.T, cols, TEST_COLS) + def get_bool_ex(cols=['f']): mat = np.ones((N, 1), dtype=bool) return make_block(mat.T, cols, TEST_COLS) + def get_int_ex(cols=['g']): mat = randn(N, 1).astype(int) return make_block(mat.T, cols, TEST_COLS) + def get_int32_ex(cols): mat = randn(N, 1).astype(np.int32) return make_block(mat.T, cols, TEST_COLS) + def get_dt_ex(cols=['h']): mat = randn(N, 1).astype(int).astype('M8[ns]') return make_block(mat.T, cols, TEST_COLS) + class TestBlock(unittest.TestCase): _multiprocess_can_split_ = True @@ -158,21 +169,21 @@ def test_delete(self): def test_split_block_at(self): bs = list(self.fblock.split_block_at('a')) - self.assertEqual(len(bs),1) + self.assertEqual(len(bs), 1) self.assertTrue(np.array_equal(bs[0].items, ['c', 'e'])) bs = list(self.fblock.split_block_at('c')) - self.assertEqual(len(bs),2) + self.assertEqual(len(bs), 2) self.assertTrue(np.array_equal(bs[0].items, ['a'])) self.assertTrue(np.array_equal(bs[1].items, ['e'])) bs = list(self.fblock.split_block_at('e')) - self.assertEqual(len(bs),1) + self.assertEqual(len(bs), 1) self.assertTrue(np.array_equal(bs[0].items, ['a', 'c'])) bblock = get_bool_ex(['f']) bs = list(bblock.split_block_at('f')) - self.assertEqual(len(bs),0) + self.assertEqual(len(bs), 0) def test_unicode_repr(self): mat = np.empty((N, 2), dtype=object) @@ -229,7 +240,7 @@ def test_is_mixed_dtype(self): for b in blocks: b.ref_items = items - mgr = BlockManager(blocks, [items, np.arange(N)]) + mgr = BlockManager(blocks, [items, np.arange(N)]) self.assert_(not mgr.is_mixed_dtype()) def test_is_indexed_like(self): @@ -273,8 +284,8 @@ def test_pickle(self): self.assert_(mgr2.blocks[0].ref_items is mgr2.blocks[1].ref_items) # GH2431 - self.assertTrue(hasattr(mgr2,"_is_consolidated")) - self.assertTrue(hasattr(mgr2,"_known_consolidated")) + self.assertTrue(hasattr(mgr2, "_is_consolidated")) + self.assertTrue(hasattr(mgr2, "_known_consolidated")) # reset to False on load self.assertFalse(mgr2._is_consolidated) @@ -403,17 +414,17 @@ def test_get_numeric_data(self): bool_ser = Series(np.array([True, False, True])) obj_ser = Series(np.array([1, 'a', 5])) dt_ser = Series(tm.makeDateIndex(3)) - #check types - df = DataFrame({'int' : int_ser, 'float' : float_ser, - 'complex' : complex_ser, 'str' : str_ser, - 'bool' : bool_ser, 'obj' : obj_ser, - 'dt' : dt_ser}) - xp = DataFrame({'int' : int_ser, 'float' : float_ser, - 'complex' : complex_ser}) + # check types + df = DataFrame({'int': int_ser, 'float': float_ser, + 'complex': complex_ser, 'str': str_ser, + 'bool': bool_ser, 'obj': obj_ser, + 'dt': dt_ser}) + xp = DataFrame({'int': int_ser, 'float': float_ser, + 'complex': complex_ser}) rs = DataFrame(df._data.get_numeric_data()) assert_frame_equal(xp, rs) - xp = DataFrame({'bool' : bool_ser}) + xp = DataFrame({'bool': bool_ser}) rs = DataFrame(df._data.get_numeric_data(type_list=bool)) assert_frame_equal(xp, rs) @@ -428,16 +439,16 @@ def test_get_numeric_data(self): self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool']) def test_missing_unicode_key(self): - df=DataFrame({"a":[1]}) + df = DataFrame({"a": [1]}) try: - df.ix[:,u"\u05d0"] # should not raise UnicodeEncodeError + df.ix[:, u"\u05d0"] # should not raise UnicodeEncodeError except KeyError: - pass # this is the expected exception + pass # this is the expected exception if __name__ == '__main__': # unittest.main() import nose # nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'], # exit=False) - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d1b139e034cdf..5b2f8fc259c09 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -19,6 +19,7 @@ import pandas.index as _index + class TestMultiLevel(unittest.TestCase): _multiprocess_can_split_ = True @@ -121,7 +122,8 @@ def _check_op(opname): # Series op = getattr(Series, opname) result = op(self.ymd['A'], month_sums['A'], level='month') - broadcasted = self.ymd['A'].groupby(level='month').transform(np.sum) + broadcasted = self.ymd['A'].groupby( + level='month').transform(np.sum) expected = op(self.ymd['A'], broadcasted) assert_series_equal(result, expected) @@ -132,6 +134,7 @@ def _check_op(opname): def test_pickle(self): import cPickle + def _test_roundtrip(frame): pickled = cPickle.dumps(frame) unpickled = cPickle.loads(pickled) @@ -256,18 +259,18 @@ def test_frame_getitem_setitem_slice(self): self.assert_((cp.values[4:] != 0).all()) def test_frame_getitem_setitem_multislice(self): - levels = [['t1', 't2'], ['a','b','c']] - labels = [[0,0,0,1,1], [0,1,2,0,1]] + levels = [['t1', 't2'], ['a', 'b', 'c']] + labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id']) - df = DataFrame({'value':[1,2,3,7,8]}, index=midx) + df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx) - result = df.ix[:,'value'] + result = df.ix[:, 'value'] assert_series_equal(df['value'], result) - result = df.ix[1:3,'value'] + result = df.ix[1:3, 'value'] assert_series_equal(df['value'][1:3], result) - result = df.ix[:,:] + result = df.ix[:, :] assert_frame_equal(df, result) result = df @@ -275,18 +278,18 @@ def test_frame_getitem_setitem_multislice(self): result['value'] = 10 assert_frame_equal(df, result) - df.ix[:,:] = 10 + df.ix[:, :] = 10 assert_frame_equal(df, result) def test_frame_getitem_multicolumn_empty_level(self): - f = DataFrame({'a': ['1','2','3'], - 'b': ['2','3','4']}) + f = DataFrame({'a': ['1', '2', '3'], + 'b': ['2', '3', '4']}) f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], ['level3 item1', 'level3 item2']] result = f['level1 item1'] - expected = DataFrame([['1'],['2'],['3']], index=f.index, + expected = DataFrame([['1'], ['2'], ['3']], index=f.index, columns=['level3 item1']) assert_frame_equal(result, expected) @@ -309,7 +312,7 @@ def test_frame_setitem_multi_column(self): df = DataFrame(index=[1, 3, 5], columns=columns) # Works, but adds a column instead of updating the two existing ones - df['A'] = 0.0 # Doesn't work + df['A'] = 0.0 # Doesn't work self.assertTrue((df['A'].values == 0).all()) # it broadcasts @@ -320,10 +323,10 @@ def test_frame_setitem_multi_column(self): def test_getitem_tuple_plus_slice(self): # GH #671 - df = DataFrame({'a' : range(10), - 'b' : range(10), - 'c' : np.random.randn(10), - 'd' : np.random.randn(10)}) + df = DataFrame({'a': range(10), + 'b': range(10), + 'c': np.random.randn(10), + 'd': np.random.randn(10)}) idf = df.set_index(['a', 'b']) @@ -345,7 +348,7 @@ def test_getitem_setitem_tuple_plus_columns(self): def test_getitem_multilevel_index_tuple_unsorted(self): index_columns = list("abc") - df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]] , + df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]) df = df.set_index(index_columns) query_index = df.index[:1] @@ -413,7 +416,7 @@ def test_xs_level_multiple(self): expected = df.xs('a').xs(4, level='four') assert_frame_equal(result, expected) - #GH2107 + # GH2107 dates = range(20111201, 20111205) ids = 'abcde' idx = MultiIndex.from_tuples([x for x in cart_product(dates, ids)]) @@ -489,8 +492,8 @@ def test_getitem_setitem_slice_integers(self): labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) - frame = DataFrame(np.random.randn(len(index), 4), index=index, - columns=['a', 'b', 'c', 'd']) + frame = DataFrame(np.random.randn(len(index), 4), index=index, + columns=['a', 'b', 'c', 'd']) res = frame.ix[1:2] exp = frame.reindex(frame.index[2:]) assert_frame_equal(res, exp) @@ -498,7 +501,7 @@ def test_getitem_setitem_slice_integers(self): frame.ix[1:2] = 7 self.assert_((frame.ix[1:2] == 7).values.all()) - series = Series(np.random.randn(len(index)), index=index) + series = Series(np.random.randn(len(index)), index=index) res = series.ix[1:2] exp = series.reindex(series.index[2:]) @@ -568,15 +571,15 @@ def test_fancy_slice_partial(self): expected = self.frame[3:7] assert_frame_equal(result, expected) - result = self.ymd.ix[(2000,2):(2000,4)] + result = self.ymd.ix[(2000, 2):(2000, 4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] assert_frame_equal(result, expected) def test_getitem_partial_column_select(self): - idx = MultiIndex(labels=[[0,0,0],[0,1,1],[1,0,1]], - levels=[['a','b'],['x','y'],['p','q']]) - df = DataFrame(np.random.rand(3,2),index=idx) + idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], + levels=[['a', 'b'], ['x', 'y'], ['p', 'q']]) + df = DataFrame(np.random.rand(3, 2), index=idx) result = df.ix[('a', 'y'), :] expected = df.ix[('a', 'y')] @@ -604,32 +607,32 @@ def test_sortlevel(self): # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) - #inplace + # inplace rs = self.frame.copy() rs.sortlevel(0, inplace=True) assert_frame_equal(rs, self.frame.sortlevel(0)) - def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) - df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], + df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2'])) def test_reset_index_with_drop(self): - deleveled = self.ymd.reset_index(drop = True) + deleveled = self.ymd.reset_index(drop=True) self.assertEquals(len(deleveled.columns), len(self.ymd.columns)) deleveled = self.series.reset_index() self.assert_(isinstance(deleveled, DataFrame)) - self.assert_(len(deleveled.columns) == len(self.series.index.levels)+1) + self.assert_( + len(deleveled.columns) == len(self.series.index.levels) + 1) - deleveled = self.series.reset_index(drop = True) + deleveled = self.series.reset_index(drop=True) self.assert_(isinstance(deleveled, Series)) def test_sortlevel_by_name(self): @@ -814,14 +817,14 @@ def test_stack_mixed_dtype(self): self.assert_(stacked['bar'].dtype == np.float_) def test_unstack_bug(self): - df = DataFrame({'state': ['naive','naive','naive', - 'activ','activ','activ'], - 'exp':['a','b','b','b','a','a'], - 'barcode':[1,2,3,4,1,3], - 'v':['hi','hi','bye','bye','bye','peace'], + df = DataFrame({'state': ['naive', 'naive', 'naive', + 'activ', 'activ', 'activ'], + 'exp': ['a', 'b', 'b', 'b', 'a', 'a'], + 'barcode': [1, 2, 3, 4, 1, 3], + 'v': ['hi', 'hi', 'bye', 'bye', 'bye', 'peace'], 'extra': np.arange(6.)}) - result = df.groupby(['state','exp','barcode','v']).apply(len) + result = df.groupby(['state', 'exp', 'barcode', 'v']).apply(len) unstacked = result.unstack() restacked = unstacked.stack() @@ -894,12 +897,12 @@ def test_unstack_sparse_keyspace(self): # Generate Long File & Test Pivot NUM_ROWS = 1000 - df = DataFrame({'A' : np.random.randint(100, size=NUM_ROWS), - 'B' : np.random.randint(300, size=NUM_ROWS), - 'C' : np.random.randint(-7, 7, size=NUM_ROWS), - 'D' : np.random.randint(-19,19, size=NUM_ROWS), - 'E' : np.random.randint(3000, size=NUM_ROWS), - 'F' : np.random.randn(NUM_ROWS)}) + df = DataFrame({'A': np.random.randint(100, size=NUM_ROWS), + 'B': np.random.randint(300, size=NUM_ROWS), + 'C': np.random.randint(-7, 7, size=NUM_ROWS), + 'D': np.random.randint(-19, 19, size=NUM_ROWS), + 'E': np.random.randint(3000, size=NUM_ROWS), + 'F': np.random.randn(NUM_ROWS)}) idf = df.set_index(['A', 'B', 'C', 'D', 'E']) @@ -922,19 +925,20 @@ def test_unstack_unobserved_keys(self): assert_frame_equal(recons, df) def test_groupby_corner(self): - midx = MultiIndex(levels=[['foo'],['bar'],['baz']], - labels=[[0],[0],[0]], names=['one','two','three']) - df = DataFrame([np.random.rand(4)], columns=['a','b','c','d'], + midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], + labels=[[0], [0], [0]], names=['one', 'two', 'three']) + df = DataFrame([np.random.rand(4)], columns=['a', 'b', 'c', 'd'], index=midx) # should work df.groupby(level='three') def test_groupby_level_no_obs(self): # #1697 - midx = MultiIndex.from_tuples([('f1', 's1'),('f1','s2'), - ('f2', 's1'),('f2', 's2'), - ('f3', 's1'),('f3','s2')]) - df = DataFrame([[1,2,3,4,5,6],[7,8,9,10,11,12]], columns= midx) + midx = MultiIndex.from_tuples([('f1', 's1'), ('f1', 's2'), + ('f2', 's1'), ('f2', 's2'), + ('f3', 's1'), ('f3', 's2')]) + df = DataFrame( + [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) df1 = df.select(lambda u: u[0] in ['f2', 'f3'], axis=1) grouped = df1.groupby(axis=1, level=0) @@ -970,8 +974,8 @@ def test_swaplevel(self): assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): - panel = Panel({'ItemA' : self.frame, - 'ItemB' : self.frame * 2}) + panel = Panel({'ItemA': self.frame, + 'ItemB': self.frame * 2}) result = panel.swaplevel(0, 1, axis='major') expected = panel.copy() @@ -1001,11 +1005,11 @@ def test_insert_index(self): self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): - x = Series(data=[1,2,3], - index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B",3)])) + x = Series(data=[1, 2, 3], + index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])) - y = Series(data=[4,5,6], - index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B",3)])) + y = Series(data=[4, 5, 6], + index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])) res = x - y exp_index = x.index.union(y.index) @@ -1071,7 +1075,7 @@ def test_frame_getitem_not_sorted(self): def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) @@ -1140,6 +1144,7 @@ def test_frame_group_ops(self): grouped = frame.groupby(level=level, axis=axis) pieces = [] + def aggf(x): pieces.append(x) return getattr(x, op)(skipna=skipna, axis=axis) @@ -1163,9 +1168,11 @@ def test_stat_op_corner(self): assert_series_equal(result, expected) def test_frame_any_all_group(self): - df = DataFrame({'data': [False, False, True, False, True, False, True]}, - index=[['one', 'one', 'two', 'one', 'two', 'two', 'two'], - [0, 1, 0, 2, 1, 2, 3]]) + df = DataFrame( + {'data': [False, False, True, False, True, False, True]}, + index=[ + ['one', 'one', 'two', 'one', 'two', 'two', 'two'], + [0, 1, 0, 2, 1, 2, 3]]) result = df.any(level=0) ex = DataFrame({'data': [False, True]}, index=['one', 'two']) @@ -1192,7 +1199,6 @@ def test_std_var_pass_ddof(self): expected = df.groupby(level=0).agg(alt) assert_frame_equal(result, expected) - def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() @@ -1395,65 +1401,65 @@ def test_int_series_slicing(self): assert_frame_equal(result, expected) def test_mixed_depth_get(self): - arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], - [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], - [ '', 'wx', 'wy', '', '', '']] + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], + ['', 'OD', 'OD', 'result1', 'result2', 'result1'], + ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4,6),columns = index) + df = DataFrame(randn(4, 6), columns=index) result = df['a'] - expected = df['a','',''] + expected = df['a', '', ''] assert_series_equal(result, expected) self.assertEquals(result.name, 'a') - result = df['routine1','result1'] - expected = df['routine1','result1',''] + result = df['routine1', 'result1'] + expected = df['routine1', 'result1', ''] assert_series_equal(result, expected) self.assertEquals(result.name, ('routine1', 'result1')) def test_mixed_depth_insert(self): - arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], - [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], - [ '', 'wx', 'wy', '', '', '']] + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], + ['', 'OD', 'OD', 'result1', 'result2', 'result1'], + ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4,6),columns = index) + df = DataFrame(randn(4, 6), columns=index) result = df.copy() expected = df.copy() - result['b'] = [1,2,3,4] - expected['b','',''] = [1,2,3,4] + result['b'] = [1, 2, 3, 4] + expected['b', '', ''] = [1, 2, 3, 4] assert_frame_equal(result, expected) def test_mixed_depth_drop(self): - arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], - [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], - [ '', 'wx', 'wy', '', '', '']] + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], + ['', 'OD', 'OD', 'result1', 'result2', 'result1'], + ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4,6),columns = index) + df = DataFrame(randn(4, 6), columns=index) - result = df.drop('a',axis=1) - expected = df.drop([('a','','')],axis=1) + result = df.drop('a', axis=1) + expected = df.drop([('a', '', '')], axis=1) assert_frame_equal(expected, result) - result = df.drop(['top'],axis=1) - expected = df.drop([('top','OD','wx')], axis=1) - expected = expected.drop([('top','OD','wy')], axis=1) + result = df.drop(['top'], axis=1) + expected = df.drop([('top', 'OD', 'wx')], axis=1) + expected = expected.drop([('top', 'OD', 'wy')], axis=1) assert_frame_equal(expected, result) result = df.drop(('top', 'OD', 'wx'), axis=1) - expected = df.drop([('top','OD','wx')], axis=1) + expected = df.drop([('top', 'OD', 'wx')], axis=1) assert_frame_equal(expected, result) - expected = df.drop([('top','OD','wy')], axis=1) + expected = df.drop([('top', 'OD', 'wy')], axis=1) expected = df.drop('top', axis=1) result = df.drop('result1', level=1, axis=1) @@ -1462,11 +1468,11 @@ def test_mixed_depth_drop(self): assert_frame_equal(expected, result) def test_drop_nonunique(self): - df = DataFrame([["x-a", "x", "a", 1.5],["x-a", "x", "a", 1.2], + df = DataFrame([["x-a", "x", "a", 1.5], ["x-a", "x", "a", 1.2], ["z-c", "z", "c", 3.1], ["x-a", "x", "a", 4.1], - ["x-b", "x", "b", 5.1],["x-b", "x", "b", 4.1], + ["x-b", "x", "b", 5.1], ["x-b", "x", "b", 4.1], ["x-b", "x", "b", 2.2], - ["y-a", "y", "a", 1.2],["z-b", "z", "b", 2.1]], + ["y-a", "y", "a", 1.2], ["z-b", "z", "b", 2.1]], columns=["var1", "var2", "var3", "var4"]) grp_size = df.groupby("var1").size() @@ -1483,25 +1489,25 @@ def test_drop_nonunique(self): assert_frame_equal(result, expected) def test_mixed_depth_pop(self): - arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], - [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], - [ '', 'wx', 'wy', '', '', '']] + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], + ['', 'OD', 'OD', 'result1', 'result2', 'result1'], + ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4,6),columns = index) + df = DataFrame(randn(4, 6), columns=index) df1 = df.copy() df2 = df.copy() result = df1.pop('a') - expected = df2.pop(('a','','')) + expected = df2.pop(('a', '', '')) assert_series_equal(expected, result) assert_frame_equal(df1, df2) - self.assertEquals(result.name,'a') + self.assertEquals(result.name, 'a') expected = df1['top'] - df1 = df1.drop(['top'],axis=1) + df1 = df1.drop(['top'], axis=1) result = df2.pop('top') assert_frame_equal(expected, result) assert_frame_equal(df1, df2) @@ -1580,7 +1586,7 @@ def test_drop_preserve_names(self): self.assert_(result.index.names == ['one', 'two']) def test_unicode_repr_issues(self): - levels = [Index([u'a/\u03c3', u'b/\u03c3',u'c/\u03c3']), + levels = [Index([u'a/\u03c3', u'b/\u03c3', u'c/\u03c3']), Index([0, 1])] labels = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] index = MultiIndex(levels=levels, labels=labels) @@ -1595,15 +1601,16 @@ def test_unicode_repr_level_names(self): names=[u'\u0394', 'i1']) s = Series(range(2), index=index) - df = DataFrame(np.random.randn(2,4), index=index) + df = DataFrame(np.random.randn(2, 4), index=index) repr(s) repr(df) def test_dataframe_insert_column_all_na(self): # GH #1534 - mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c')]) - df = DataFrame([[1,2],[3,4],[5,6]], index=mix) - s = Series({(1,1): 1, (1,2): 2}) + mix = MultiIndex.from_tuples( + [('1a', '2a'), ('1a', '2b'), ('1a', '2c')]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) + s = Series({(1, 1): 1, (1, 2): 2}) df['new'] = s self.assert_(df['new'].isnull().all()) @@ -1628,20 +1635,21 @@ def test_set_column_scalar_with_ix(self): self.assert_((self.frame.ix[subset, 'B'] == 97).all()) def test_frame_dict_constructor_empty_series(self): - s1 = Series([1,2,3, 4], index=MultiIndex.from_tuples([(1,2),(1,3), - (2,2),(2,4)])) - s2 = Series([1,2,3,4], - index=MultiIndex.from_tuples([(1,2),(1,3),(3,2),(3,4)])) + s1 = Series([1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), + (2, 2), (2, 4)])) + s2 = Series([1, 2, 3, 4], + index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) s3 = Series() # it works! - df = DataFrame({'foo':s1, 'bar':s2, 'baz':s3}) - df = DataFrame.from_dict({'foo':s1, 'baz':s3, 'bar':s2}) + df = DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) + df = DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2}) def test_indexing_ambiguity_bug_1678(self): columns = MultiIndex.from_tuples([('Ohio', 'Green'), ('Ohio', 'Red'), ('Colorado', 'Green')]) - index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)]) + index = MultiIndex.from_tuples( + [('a', 1), ('a', 2), ('b', 1), ('b', 2)]) frame = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns) @@ -1679,7 +1687,7 @@ def test_indexing_over_hashtable_size_cutoff(self): _index._SIZE_CUTOFF = old_cutoff def test_xs_mixed_no_copy(self): - index = MultiIndex.from_arrays([['a','a', 'b', 'b'], [1,2,1,2]], + index = MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]], names=['first', 'second']) data = DataFrame(np.random.rand(len(index)), index=index, columns=['A']) @@ -1704,16 +1712,16 @@ def test_multiindex_na_repr(self): def test_assign_index_sequences(self): # #2200 - df = DataFrame({"a":[1,2,3], - "b":[4,5,6], - "c":[7,8,9]}).set_index(["a","b"]) + df = DataFrame({"a": [1, 2, 3], + "b": [4, 5, 6], + "c": [7, 8, 9]}).set_index(["a", "b"]) l = list(df.index) - l[0]=("faz","boo") + l[0] = ("faz", "boo") df.index = l repr(df) # this travels an improper code path - l[0] = ["faz","boo"] + l[0] = ["faz", "boo"] df.index = l repr(df) @@ -1732,5 +1740,5 @@ def test_tuples_have_na(self): import nose # nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'], # exit=False) - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_ndframe.py b/pandas/tests/test_ndframe.py index c1fd16f5bc0b0..e017bf07039d7 100644 --- a/pandas/tests/test_ndframe.py +++ b/pandas/tests/test_ndframe.py @@ -5,6 +5,7 @@ from pandas.core.generic import NDFrame import pandas.util.testing as t + class TestNDFrame(unittest.TestCase): _multiprocess_can_split_ = True @@ -27,5 +28,5 @@ def test_astype(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 355da62b63c08..6e45bd9fd3049 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -22,6 +22,7 @@ import pandas.core.panel as panelm import pandas.util.testing as tm + def _skip_if_no_scipy(): try: import scipy.stats @@ -42,8 +43,10 @@ def test_cumsum(self): cumsum = self.panel.cumsum() assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) + class SafeForLongAndSparse(object): _multiprocess_can_split_ = True + def test_repr(self): foo = repr(self.panel) @@ -82,6 +85,7 @@ def test_skew(self): from scipy.stats import skew except ImportError: raise nose.SkipTest + def this_skew(x): if len(x) < 3: return np.nan @@ -149,8 +153,10 @@ def wrapper(x): self.assertRaises(Exception, f, axis=obj.ndim) + class SafeForSparse(object): _multiprocess_can_split_ = True + @classmethod def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) @@ -240,7 +246,7 @@ def test_arith(self): self._test_op(self.panel, lambda x, y: x - y) # panel - 1 self._test_op(self.panel, lambda x, y: x * y) # panel * 1 self._test_op(self.panel, lambda x, y: x / y) # panel / 1 - self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 + self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 self.assertRaises(Exception, self.panel.__add__, self.panel['ItemA']) @@ -351,9 +357,11 @@ def test_abs(self): expected = np.abs(s) assert_series_equal(result, expected) + class CheckIndexing(object): _multiprocess_can_split_ = True + def test_getitem(self): self.assertRaises(Exception, self.panel.__getitem__, 'ItemQ') @@ -427,15 +435,15 @@ def test_setitem(self): def test_setitem_ndarray(self): from pandas import date_range, datetools - timeidx = date_range(start=datetime(2009,1,1), - end=datetime(2009,12,31), + timeidx = date_range(start=datetime(2009, 1, 1), + end=datetime(2009, 12, 31), freq=datetools.MonthEnd()) lons_coarse = np.linspace(-177.5, 177.5, 72) lats_coarse = np.linspace(-87.5, 87.5, 36) P = Panel(items=timeidx, major_axis=lons_coarse, minor_axis=lats_coarse) - data = np.random.randn(72*36).reshape((72,36)) - key = datetime(2009,2,28) + data = np.random.randn(72 * 36).reshape((72, 36)) + key = datetime(2009, 2, 28) P[key] = data assert_almost_equal(P[key].values, data) @@ -588,9 +596,10 @@ def test_getitem_fancy_xs_check_view(self): self._check_view((NS, date, 'C'), comp) def test_ix_setitem_slice_dataframe(self): - a = Panel(items=[1,2,3],major_axis=[11,22,33],minor_axis=[111,222,333]) - b = DataFrame(np.random.randn(2,3), index=[111,333], - columns=[1,2,3]) + a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], + minor_axis=[111, 222, 333]) + b = DataFrame(np.random.randn(2, 3), index=[111, 333], + columns=[1, 2, 3]) a.ix[:, 22, [111, 333]] = b @@ -643,14 +652,15 @@ def _check_view(self, indexer, comp): comp(cp.ix[indexer].reindex_like(obj), obj) def test_logical_with_nas(self): - d = Panel({ 'ItemA' : {'a': [np.nan, False] }, 'ItemB' : { 'a': [True, True] } }) + d = Panel({'ItemA': {'a': [np.nan, False]}, 'ItemB': { + 'a': [True, True]}}) result = d['ItemA'] | d['ItemB'] - expected = DataFrame({ 'a' : [np.nan, True] }) + expected = DataFrame({'a': [np.nan, True]}) assert_frame_equal(result, expected) result = d['ItemA'].fillna(False) | d['ItemB'] - expected = DataFrame({ 'a' : [True, True] }, dtype=object) + expected = DataFrame({'a': [True, True]}, dtype=object) assert_frame_equal(result, expected) def test_neg(self): @@ -658,13 +668,13 @@ def test_neg(self): assert_panel_equal(-self.panel, -1 * self.panel) def test_invert(self): - assert_panel_equal(-(self.panel < 0), ~(self.panel <0)) + assert_panel_equal(-(self.panel < 0), ~(self.panel < 0)) def test_comparisons(self): p1 = tm.makePanel() p2 = tm.makePanel() - tp = p1.reindex(items = p1.items + ['foo']) + tp = p1.reindex(items=p1.items + ['foo']) df = p1[p1.items[0]] def test_comp(func): @@ -719,12 +729,14 @@ def test_set_value(self): _panel = tm.makePanel() tm.add_nans(_panel) + class TestPanel(unittest.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): _multiprocess_can_split_ = True + @classmethod - def assert_panel_equal(cls,x, y): + def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) def setUp(self): @@ -743,8 +755,8 @@ def test_constructor(self): assert_panel_equal(wp, self.panel) # strings handled prop - wp = Panel([[['foo', 'foo', 'foo',], - ['foo', 'foo', 'foo']]]) + wp = Panel([[['foo', 'foo', 'foo', ], + ['foo', 'foo', 'foo']]]) self.assert_(wp.values.dtype == np.object_) vals = self.panel.values @@ -796,14 +808,14 @@ def test_ctor_dict(self): itema = self.panel['ItemA'] itemb = self.panel['ItemB'] - d = {'A' : itema, 'B' : itemb[5:]} - d2 = {'A' : itema._series, 'B' : itemb[5:]._series} - d3 = {'A' : None, - 'B' : DataFrame(itemb[5:]._series), - 'C' : DataFrame(itema._series)} + d = {'A': itema, 'B': itemb[5:]} + d2 = {'A': itema._series, 'B': itemb[5:]._series} + d3 = {'A': None, + 'B': DataFrame(itemb[5:]._series), + 'C': DataFrame(itema._series)} wp = Panel.from_dict(d) - wp2 = Panel.from_dict(d2) # nested Dict + wp2 = Panel.from_dict(d2) # nested Dict wp3 = Panel.from_dict(d3) self.assert_(wp.major_axis.equals(self.panel.major_axis)) assert_panel_equal(wp, wp2) @@ -818,9 +830,9 @@ def test_ctor_dict(self): assert_panel_equal(Panel(d3), Panel.from_dict(d3)) # a pathological case - d4 = { 'A' : None, 'B' : None } + d4 = {'A': None, 'B': None} wp4 = Panel.from_dict(d4) - assert_panel_equal(Panel(d4), Panel(items = ['A','B'])) + assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) # cast dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) @@ -879,8 +891,8 @@ def test_from_dict_mixed_orient(self): df = tm.makeDataFrame() df['foo'] = 'bar' - data = {'k1' : df, - 'k2' : df} + data = {'k1': df, + 'k2': df} panel = Panel.from_dict(data, orient='minor') @@ -1174,12 +1186,12 @@ def test_shift(self): assert_panel_equal(result, expected) def test_multiindex_get(self): - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b',2)], + ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)], names=['first', 'second']) - wp = Panel(np.random.random((4,5,5)), - items=ind, - major_axis=np.arange(5), - minor_axis=np.arange(5)) + wp = Panel(np.random.random((4, 5, 5)), + items=ind, + major_axis=np.arange(5), + minor_axis=np.arange(5)) f1 = wp['a'] f2 = wp.ix['a'] assert_panel_equal(f1, f2) @@ -1198,7 +1210,7 @@ def test_multiindex_blocks(self): f1 = wp['a'] self.assert_((f1.items == [1, 2]).all()) - f1 = wp[('b',1)] + f1 = wp[('b', 1)] self.assert_((f1.columns == ['A', 'B', 'C', 'D']).all()) def test_repr_empty(self): @@ -1207,9 +1219,9 @@ def test_repr_empty(self): def test_rename(self): mapper = { - 'ItemA' : 'foo', - 'ItemB' : 'bar', - 'ItemC' : 'baz' + 'ItemA': 'foo', + 'ItemB': 'bar', + 'ItemC': 'baz' } renamed = self.panel.rename_axis(mapper, axis=0) @@ -1239,14 +1251,14 @@ def test_group_agg(self): assert(agged[2][0] == 4.5) # test a function that doesn't aggregate - f2 = lambda x: np.zeros((2,2)) + f2 = lambda x: np.zeros((2, 2)) self.assertRaises(Exception, group_agg, values, bounds, f2) def test_from_frame_level1_unsorted(self): tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), ('MSFT', 1)] midx = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.rand(5,4), index=midx) + df = DataFrame(np.random.rand(5, 4), index=midx) p = df.to_panel() assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) @@ -1265,7 +1277,7 @@ def test_to_excel(self): self.panel.to_excel(path) reader = ExcelFile(path) for item, df in self.panel.iterkv(): - recdf = reader.parse(str(item),index_col=0) + recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) os.remove(path) @@ -1305,21 +1317,21 @@ def test_update(self): [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) other = Panel([[[3.6, 2., np.nan], - [np.nan, np.nan, 7]]], items=[1]) + [np.nan, np.nan, 7]]], items=[1]) pan.update(other) expected = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[3.6, 2., 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[3.6, 2., 3], [1.5, np.nan, 7], [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) @@ -1328,27 +1340,27 @@ def test_update(self): def test_update_from_dict(self): pan = Panel({'one': DataFrame([[1.5, np.nan, 3], - [1.5, np.nan, 3], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]), - 'two': DataFrame([[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]])}) + [1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'two': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) other = {'two': DataFrame([[3.6, 2., np.nan], - [np.nan, np.nan, 7]])} + [np.nan, np.nan, 7]])} pan.update(other) expected = Panel({'two': DataFrame([[3.6, 2., 3], - [1.5, np.nan, 7], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]), + [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), 'one': DataFrame([[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]])}) + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) assert_panel_equal(pan, expected) @@ -1357,10 +1369,10 @@ def test_update_nooverwrite(self): [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) @@ -1368,10 +1380,10 @@ def test_update_nooverwrite(self): pan.update(other, overwrite=False) expected = Panel([[[1.5, np.nan, 3], - [1.5, np.nan, 3], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, 2., 3.], + [1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, 2., 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) @@ -1383,21 +1395,21 @@ def test_update_filtered(self): [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) other = Panel([[[3.6, 2., np.nan], - [np.nan, np.nan, 7]]], items=[1]) + [np.nan, np.nan, 7]]], items=[1]) pan.update(other, filter_func=lambda x: x > 2) expected = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3], [1.5, np.nan, 7], [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) @@ -1409,19 +1421,21 @@ def test_update_raise(self): [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) np.testing.assert_raises(Exception, pan.update, *(pan,), - **{'raise_conflict': True}) + **{'raise_conflict': True}) + class TestLongPanel(unittest.TestCase): """ LongPanel no longer exists, but... """ _multiprocess_can_split_ = True + def setUp(self): panel = tm.makePanel() tm.add_nans(panel) @@ -1546,13 +1560,13 @@ def test_axis_dummies(self): self.assertEqual(len(major_dummies.columns), len(self.panel.index.levels[0])) - mapping = {'A' : 'one', - 'B' : 'one', - 'C' : 'two', - 'D' : 'two'} + mapping = {'A': 'one', + 'B': 'one', + 'C': 'two', + 'D': 'two'} transformed = make_axis_dummies(self.panel, 'minor', - transform=mapping.get) + transform=mapping.get) self.assertEqual(len(transformed.columns), 2) self.assert_(np.array_equal(transformed.columns, ['one', 'two'])) @@ -1633,6 +1647,7 @@ def test_pivot(self): # corner case, empty df = pivot(np.array([]), np.array([]), np.array([])) + def test_monotonic(): pos = np.array([1, 2, 3, 5]) @@ -1646,13 +1661,14 @@ def test_monotonic(): assert not panelm._monotonic(neg2) + def test_panel_index(): - index = panelm.panel_index([1,2,3,4], [1,2,3]) - expected = MultiIndex.from_arrays([np.tile([1,2,3,4], 3), - np.repeat([1,2,3], 4)]) + index = panelm.panel_index([1, 2, 3, 4], [1, 2, 3]) + expected = MultiIndex.from_arrays([np.tile([1, 2, 3, 4], 3), + np.repeat([1, 2, 3], 4)]) assert(index.equals(expected)) if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 088e9a9fa137c..e0180f475ca45 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -24,11 +24,13 @@ assert_almost_equal) import pandas.util.testing as tm + def add_nans(panel4d): for l, label in enumerate(panel4d.labels): panel = panel4d[label] tm.add_nans(panel) + class SafeForLongAndSparse(object): _multiprocess_can_split_ = True @@ -139,6 +141,7 @@ def wrapper(x): self.assertRaises(Exception, f, axis=obj.ndim) + class SafeForSparse(object): _multiprocess_can_split_ = True @@ -159,9 +162,9 @@ def test_get_axis(self): def test_set_axis(self): new_labels = Index(np.arange(len(self.panel4d.labels))) - new_items = Index(np.arange(len(self.panel4d.items))) - new_major = Index(np.arange(len(self.panel4d.major_axis))) - new_minor = Index(np.arange(len(self.panel4d.minor_axis))) + new_items = Index(np.arange(len(self.panel4d.items))) + new_major = Index(np.arange(len(self.panel4d.major_axis))) + new_minor = Index(np.arange(len(self.panel4d.minor_axis))) # ensure propagate to potentially prior-cached items too label = self.panel4d['l1'] @@ -236,7 +239,7 @@ def test_select(self): # select labels result = p.select(lambda x: x in ('l1', 'l3'), axis='labels') - expected = p.reindex(labels=['l1','l3']) + expected = p.reindex(labels=['l1', 'l3']) self.assert_panel4d_equal(result, expected) # select items @@ -282,6 +285,7 @@ def test_abs(self): expected = np.abs(df) assert_frame_equal(result, expected) + class CheckIndexing(object): _multiprocess_can_split_ = True @@ -291,7 +295,7 @@ def test_getitem(self): def test_delitem_and_pop(self): expected = self.panel4d['l2'] - result = self.panel4d.pop('l2') + result = self.panel4d.pop('l2') assert_panel_equal(expected, result) self.assert_('l2' not in self.panel4d.labels) @@ -335,20 +339,21 @@ def test_delitem_and_pop(self): def test_setitem(self): ## LongPanel with one item - #lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - #self.assertRaises(Exception, self.panel.__setitem__, + # lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() + # self.assertRaises(Exception, self.panel.__setitem__, # 'ItemE', lp) # Panel - p = Panel(dict(ItemA = self.panel4d['l1']['ItemA'][2:].filter(items=['A', 'B']))) + p = Panel(dict( + ItemA=self.panel4d['l1']['ItemA'][2:].filter(items=['A', 'B']))) self.panel4d['l4'] = p self.panel4d['l5'] = p p2 = self.panel4d['l4'] - assert_panel_equal(p, p2.reindex(items = p.items, - major_axis = p.major_axis, - minor_axis = p.minor_axis)) + assert_panel_equal(p, p2.reindex(items=p.items, + major_axis=p.major_axis, + minor_axis=p.minor_axis)) # scalar self.panel4d['lG'] = 1 @@ -368,8 +373,8 @@ def test_comparisons(self): p1 = tm.makePanel4D() p2 = tm.makePanel4D() - tp = p1.reindex(labels = p1.labels + ['foo']) - p = p1[p1.labels[0]] + tp = p1.reindex(labels=p1.labels + ['foo']) + p = p1[p1.labels[0]] def test_comp(func): result = func(p1, p2) @@ -413,7 +418,7 @@ def test_major_xs(self): ref = self.panel4d['l1']['ItemA'] idx = self.panel4d.major_axis[5] - xs = self.panel4d.major_xs(idx) + xs = self.panel4d.major_xs(idx) assert_series_equal(xs['l1'].T['ItemA'], ref.xs(idx)) @@ -468,9 +473,9 @@ def test_getitem_fancy_labels(self): panel4d = self.panel4d labels = panel4d.labels[[1, 0]] - items = panel4d.items[[1, 0]] - dates = panel4d.major_axis[::2] - cols = ['D', 'C', 'F'] + items = panel4d.items[[1, 0]] + dates = panel4d.major_axis[::2] + cols = ['D', 'C', 'F'] # all 4 specified assert_panel4d_equal(panel4d.ix[labels, items, dates, cols], @@ -508,15 +513,16 @@ def test_getitem_fancy_ints(self): def test_getitem_fancy_xs(self): raise nose.SkipTest - #self.assertRaises(NotImplementedError, self.panel4d.major_xs) - #self.assertRaises(NotImplementedError, self.panel4d.minor_xs) + # self.assertRaises(NotImplementedError, self.panel4d.major_xs) + # self.assertRaises(NotImplementedError, self.panel4d.minor_xs) def test_get_value(self): for label in self.panel4d.labels: for item in self.panel4d.items: for mjr in self.panel4d.major_axis[::2]: for mnr in self.panel4d.minor_axis: - result = self.panel4d.get_value(label, item, mjr, mnr) + result = self.panel4d.get_value( + label, item, mjr, mnr) expected = self.panel4d[label][item][mnr][mjr] assert_almost_equal(result, expected) @@ -526,7 +532,8 @@ def test_set_value(self): for mjr in self.panel4d.major_axis[::2]: for mnr in self.panel4d.minor_axis: self.panel4d.set_value(label, item, mjr, mnr, 1.) - assert_almost_equal(self.panel4d[label][item][mnr][mjr], 1.) + assert_almost_equal( + self.panel4d[label][item][mnr][mjr], 1.) # resize res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) @@ -537,13 +544,14 @@ def test_set_value(self): res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) self.assert_(com.is_float_dtype(res3['l4'].values)) + class TestPanel4d(unittest.TestCase, CheckIndexing, SafeForSparse, SafeForLongAndSparse): _multiprocess_can_split_ = True @classmethod - def assert_panel4d_equal(cls,x, y): + def assert_panel4d_equal(cls, x, y): assert_panel4d_equal(x, y) def setUp(self): @@ -560,9 +568,9 @@ def test_constructor(self): assert_panel4d_equal(panel4d, self.panel4d) # strings handled prop - #panel4d = Panel4D([[['foo', 'foo', 'foo',], + # panel4d = Panel4D([[['foo', 'foo', 'foo',], # ['foo', 'foo', 'foo']]]) - #self.assert_(wp.values.dtype == np.object_) + # self.assert_(wp.values.dtype == np.object_) vals = self.panel4d.values @@ -613,34 +621,35 @@ def test_ctor_dict(self): l1 = self.panel4d['l1'] l2 = self.panel4d['l2'] - d = {'A' : l1, 'B' : l2.ix[['ItemB'],:,:] } - #d2 = {'A' : itema._series, 'B' : itemb[5:]._series} - #d3 = {'A' : DataFrame(itema._series), + d = {'A': l1, 'B': l2.ix[['ItemB'], :, :]} + # d2 = {'A' : itema._series, 'B' : itemb[5:]._series} + # d3 = {'A' : DataFrame(itema._series), # 'B' : DataFrame(itemb[5:]._series)} panel4d = Panel4D(d) - #wp2 = Panel.from_dict(d2) # nested Dict - #wp3 = Panel.from_dict(d3) - #self.assert_(wp.major_axis.equals(self.panel.major_axis)) + # wp2 = Panel.from_dict(d2) # nested Dict + # wp3 = Panel.from_dict(d3) + # self.assert_(wp.major_axis.equals(self.panel.major_axis)) assert_panel_equal(panel4d['A'], self.panel4d['l1']) - assert_frame_equal(panel4d.ix['B','ItemB',:,:], self.panel4d.ix['l2',['ItemB'],:,:]['ItemB']) + assert_frame_equal(panel4d.ix['B', 'ItemB', :, :], + self.panel4d.ix['l2', ['ItemB'], :, :]['ItemB']) # intersect - #wp = Panel.from_dict(d, intersect=True) - #self.assert_(wp.major_axis.equals(itemb.index[5:])) + # wp = Panel.from_dict(d, intersect=True) + # self.assert_(wp.major_axis.equals(itemb.index[5:])) # use constructor - #assert_panel_equal(Panel(d), Panel.from_dict(d)) - #assert_panel_equal(Panel(d2), Panel.from_dict(d2)) - #assert_panel_equal(Panel(d3), Panel.from_dict(d3)) + # assert_panel_equal(Panel(d), Panel.from_dict(d)) + # assert_panel_equal(Panel(d2), Panel.from_dict(d2)) + # assert_panel_equal(Panel(d3), Panel.from_dict(d3)) # cast - #dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) + # dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) # for k, v in d.iteritems()) - #result = Panel(dcasted, dtype=int) - #expected = Panel(dict((k, v.astype(int)) + # result = Panel(dcasted, dtype=int) + # expected = Panel(dict((k, v.astype(int)) # for k, v in dcasted.iteritems())) - #assert_panel_equal(result, expected) + # assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): data = dict((k, v.values) for k, v in self.panel4d.iterkv()) @@ -649,10 +658,10 @@ def test_constructor_dict_mixed(self): self.assert_(result.major_axis.equals(exp_major)) result = Panel4D(data, - labels = self.panel4d.labels, - items = self.panel4d.items, - major_axis = self.panel4d.major_axis, - minor_axis = self.panel4d.minor_axis) + labels=self.panel4d.labels, + items=self.panel4d.items, + major_axis=self.panel4d.major_axis, + minor_axis=self.panel4d.minor_axis) assert_panel4d_equal(result, self.panel4d) data['l2'] = self.panel4d['l2'] @@ -667,14 +676,16 @@ def test_constructor_dict_mixed(self): self.assertRaises(Exception, Panel4D, data) def test_constructor_resize(self): - data = self.panel4d._data - labels= self.panel4d.labels[:-1] + data = self.panel4d._data + labels = self.panel4d.labels[:-1] items = self.panel4d.items[:-1] major = self.panel4d.major_axis[:-1] minor = self.panel4d.minor_axis[:-1] - result = Panel4D(data, labels=labels, items=items, major_axis=major, minor_axis=minor) - expected = self.panel4d.reindex(labels=labels, items=items, major=major, minor=minor) + result = Panel4D(data, labels=labels, items=items, + major_axis=major, minor_axis=minor) + expected = self.panel4d.reindex( + labels=labels, items=items, major=major, minor=minor) assert_panel4d_equal(result, expected) result = Panel4D(data, items=items, major_axis=major) @@ -718,7 +729,7 @@ def test_reindex(self): ref = self.panel4d['l2'] # labels - result = self.panel4d.reindex(labels=['l1','l2']) + result = self.panel4d.reindex(labels=['l1', 'l2']) assert_panel_equal(result['l2'], ref) # items @@ -728,7 +739,8 @@ def test_reindex(self): # major new_major = list(self.panel4d.major_axis[:10]) result = self.panel4d.reindex(major=new_major) - assert_frame_equal(result['l2']['ItemB'], ref['ItemB'].reindex(index=new_major)) + assert_frame_equal( + result['l2']['ItemB'], ref['ItemB'].reindex(index=new_major)) # raise exception put both major and major_axis self.assertRaises(Exception, self.panel4d.reindex, @@ -737,12 +749,13 @@ def test_reindex(self): # minor new_minor = list(self.panel4d.minor_axis[:2]) result = self.panel4d.reindex(minor=new_minor) - assert_frame_equal(result['l2']['ItemB'], ref['ItemB'].reindex(columns=new_minor)) + assert_frame_equal( + result['l2']['ItemB'], ref['ItemB'].reindex(columns=new_minor)) result = self.panel4d.reindex(labels=self.panel4d.labels, - items =self.panel4d.items, - major =self.panel4d.major_axis, - minor =self.panel4d.minor_axis) + items=self.panel4d.items, + major=self.panel4d.major_axis, + minor=self.panel4d.minor_axis) assert(result.labels is self.panel4d.labels) assert(result.items is self.panel4d.items) @@ -758,19 +771,20 @@ def test_reindex(self): larger = smaller.reindex(major=self.panel4d.major_axis, method='pad') - assert_panel_equal(larger.ix[:,:,self.panel4d.major_axis[1],:], - smaller.ix[:,:,smaller_major[0],:]) + assert_panel_equal(larger.ix[:, :, self.panel4d.major_axis[1], :], + smaller.ix[:, :, smaller_major[0], :]) # don't necessarily copy - result = self.panel4d.reindex(major=self.panel4d.major_axis, copy=False) + result = self.panel4d.reindex( + major=self.panel4d.major_axis, copy=False) self.assert_(result is self.panel4d) def test_reindex_like(self): # reindex_like smaller = self.panel4d.reindex(labels=self.panel4d.labels[:-1], - items =self.panel4d.items[:-1], - major =self.panel4d.major_axis[:-1], - minor =self.panel4d.minor_axis[:-1]) + items=self.panel4d.items[:-1], + major=self.panel4d.major_axis[:-1], + minor=self.panel4d.minor_axis[:-1]) smaller_like = self.panel4d.reindex_like(smaller) assert_panel4d_equal(smaller, smaller_like) @@ -793,7 +807,7 @@ def test_take(self): def test_sort_index(self): import random - rlabels= list(self.panel4d.labels) + rlabels = list(self.panel4d.labels) ritems = list(self.panel4d.items) rmajor = list(self.panel4d.major_axis) rminor = list(self.panel4d.minor_axis) @@ -807,18 +821,18 @@ def test_sort_index(self): assert_panel4d_equal(sorted_panel4d, self.panel4d) # descending - #random_order = self.panel.reindex(items=ritems) - #sorted_panel = random_order.sort_index(axis=0, ascending=False) - #assert_panel_equal(sorted_panel, + # random_order = self.panel.reindex(items=ritems) + # sorted_panel = random_order.sort_index(axis=0, ascending=False) + # assert_panel_equal(sorted_panel, # self.panel.reindex(items=self.panel.items[::-1])) - #random_order = self.panel.reindex(major=rmajor) - #sorted_panel = random_order.sort_index(axis=1) - #assert_panel_equal(sorted_panel, self.panel) + # random_order = self.panel.reindex(major=rmajor) + # sorted_panel = random_order.sort_index(axis=1) + # assert_panel_equal(sorted_panel, self.panel) - #random_order = self.panel.reindex(minor=rminor) - #sorted_panel = random_order.sort_index(axis=2) - #assert_panel_equal(sorted_panel, self.panel) + # random_order = self.panel.reindex(minor=rminor) + # sorted_panel = random_order.sort_index(axis=2) + # assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): filled = self.panel4d.fillna(0) @@ -840,10 +854,10 @@ def test_fillna(self): assert_panel4d_equal(filled, empty) def test_swapaxes(self): - result = self.panel4d.swapaxes('labels','items') + result = self.panel4d.swapaxes('labels', 'items') self.assert_(result.items is self.panel4d.labels) - result = self.panel4d.swapaxes('labels','minor') + result = self.panel4d.swapaxes('labels', 'minor') self.assert_(result.labels is self.panel4d.minor_axis) result = self.panel4d.swapaxes('items', 'minor') @@ -985,9 +999,9 @@ def test_repr_empty(self): def test_rename(self): mapper = { - 'l1' : 'foo', - 'l2' : 'bar', - 'l3' : 'baz' + 'l1': 'foo', + 'l2': 'bar', + 'l3': 'baz' } renamed = self.panel4d.rename_axis(mapper, axis=0) @@ -1017,7 +1031,7 @@ def test_group_agg(self): assert(agged[2][0] == 4.5) # test a function that doesn't aggregate - f2 = lambda x: np.zeros((2,2)) + f2 = lambda x: np.zeros((2, 2)) self.assertRaises(Exception, group_agg, values, bounds, f2) def test_from_frame_level1_unsorted(self): @@ -1050,6 +1064,6 @@ def test_to_excel(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure', + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '--with-timer'], exit=False) diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 1debfd54aac3c..5675cfec58678 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -18,6 +18,7 @@ assert_almost_equal) import pandas.util.testing as tm + class TestPanelnd(unittest.TestCase): def setUp(self): @@ -27,85 +28,84 @@ def test_4d_construction(self): # create a 4D Panel4D = panelnd.create_nd_panel_factory( - klass_name = 'Panel4D', - axis_orders = ['labels','items','major_axis','minor_axis'], - axis_slices = { 'items' : 'items', 'major_axis' : 'major_axis', - 'minor_axis' : 'minor_axis' }, - slicer = Panel, - axis_aliases = { 'major' : 'major_axis', 'minor' : 'minor_axis' }, - stat_axis = 2) + klass_name='Panel4D', + axis_orders=['labels', 'items', 'major_axis', 'minor_axis'], + axis_slices={'items': 'items', 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer=Panel, + axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + stat_axis=2) - p4d = Panel4D(dict(L1 = tm.makePanel(), L2 = tm.makePanel())) + p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) def test_4d_construction_alt(self): # create a 4D Panel4D = panelnd.create_nd_panel_factory( - klass_name = 'Panel4D', - axis_orders = ['labels','items','major_axis','minor_axis'], - axis_slices = { 'items' : 'items', 'major_axis' : 'major_axis', - 'minor_axis' : 'minor_axis' }, - slicer = 'Panel', - axis_aliases = { 'major' : 'major_axis', 'minor' : 'minor_axis' }, - stat_axis = 2) + klass_name='Panel4D', + axis_orders=['labels', 'items', 'major_axis', 'minor_axis'], + axis_slices={'items': 'items', 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer='Panel', + axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + stat_axis=2) - p4d = Panel4D(dict(L1 = tm.makePanel(), L2 = tm.makePanel())) + p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) def test_4d_construction_error(self): # create a 4D self.assertRaises(Exception, panelnd.create_nd_panel_factory, - klass_name = 'Panel4D', - axis_orders = ['labels', 'items', 'major_axis', - 'minor_axis'], - axis_slices = { 'items' : 'items', - 'major_axis' : 'major_axis', - 'minor_axis' : 'minor_axis' }, - slicer = 'foo', - axis_aliases = { 'major' : 'major_axis', - 'minor' : 'minor_axis' }, - stat_axis = 2) - + klass_name='Panel4D', + axis_orders=['labels', 'items', 'major_axis', + 'minor_axis'], + axis_slices={'items': 'items', + 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer='foo', + axis_aliases={'major': 'major_axis', + 'minor': 'minor_axis'}, + stat_axis=2) def test_5d_construction(self): # create a 4D Panel4D = panelnd.create_nd_panel_factory( - klass_name = 'Panel4D', - axis_orders = ['labels1','items','major_axis','minor_axis'], - axis_slices = { 'items' : 'items', 'major_axis' : 'major_axis', - 'minor_axis' : 'minor_axis' }, - slicer = Panel, - axis_aliases = { 'major' : 'major_axis', 'minor' : 'minor_axis' }, - stat_axis = 2) + klass_name='Panel4D', + axis_orders=['labels1', 'items', 'major_axis', 'minor_axis'], + axis_slices={'items': 'items', 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer=Panel, + axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + stat_axis=2) - p4d = Panel4D(dict(L1 = tm.makePanel(), L2 = tm.makePanel())) + p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) # create a 5D Panel5D = panelnd.create_nd_panel_factory( - klass_name = 'Panel5D', - axis_orders = [ 'cool1', 'labels1', 'items', 'major_axis', - 'minor_axis'], - axis_slices = { 'labels1' : 'labels1', 'items' : 'items', - 'major_axis' : 'major_axis', - 'minor_axis' : 'minor_axis' }, - slicer = Panel4D, - axis_aliases = { 'major' : 'major_axis', 'minor' : 'minor_axis' }, - stat_axis = 2) - - p5d = Panel5D(dict(C1 = p4d)) + klass_name='Panel5D', + axis_orders=['cool1', 'labels1', 'items', 'major_axis', + 'minor_axis'], + axis_slices={'labels1': 'labels1', 'items': 'items', + 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer=Panel4D, + axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + stat_axis=2) + + p5d = Panel5D(dict(C1=p4d)) # slice back to 4d - results = p5d.ix['C1',:,:,0:3,:] - expected = p4d.ix[:,:,0:3,:] + results = p5d.ix['C1', :, :, 0:3, :] + expected = p4d.ix[:, :, 0:3, :] assert_panel_equal(results['L1'], expected['L1']) # test a transpose - #results = p5d.transpose(1,2,3,4,0) - #expected = + # results = p5d.transpose(1,2,3,4,0) + # expected = if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index a921518c25c95..278e745c7d312 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -19,6 +19,7 @@ _multiprocess_can_split_ = True + def test_melt(): df = tm.makeTimeDataFrame()[:10] df['id1'] = (df['A'] > 0).astype(int) @@ -32,30 +33,32 @@ def test_melt(): molten5 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) + def test_convert_dummies(): - df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B' : ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C' : np.random.randn(8), - 'D' : np.random.randn(8)}) + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) result = convert_dummies(df, ['A', 'B']) result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.') - expected = DataFrame({'A_foo' : [1, 0, 1, 0, 1, 0, 1, 1], - 'A_bar' : [0, 1, 0, 1, 0, 1, 0, 0], - 'B_one' : [1, 1, 0, 0, 0, 0, 1, 0], - 'B_two' : [0, 0, 1, 0, 1, 1, 0, 0], - 'B_three' : [0, 0, 0, 1, 0, 0, 0, 1], - 'C' : df['C'].values, - 'D' : df['D'].values}, + expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1], + 'A_bar': [0, 1, 0, 1, 0, 1, 0, 0], + 'B_one': [1, 1, 0, 0, 0, 0, 1, 0], + 'B_two': [0, 0, 1, 0, 1, 1, 0, 0], + 'B_three': [0, 0, 0, 1, 0, 0, 0, 1], + 'C': df['C'].values, + 'D': df['D'].values}, columns=result.columns, dtype=float) expected2 = expected.rename(columns=lambda x: x.replace('_', '.')) tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected2) + class Test_lreshape(unittest.TestCase): def test_pairs(self): @@ -130,5 +133,5 @@ def test_pairs(self): if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 0ac32371cdad7..335d747e960fb 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -27,6 +27,7 @@ from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm + def _skip_if_no_scipy(): try: import scipy.stats @@ -39,6 +40,7 @@ def _skip_if_no_scipy(): JOIN_TYPES = ['inner', 'outer', 'left', 'right'] + class CheckNameIntegration(object): _multiprocess_can_split_ = True @@ -109,7 +111,7 @@ def test_multilevel_name_print(self): labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) - s = Series(range(0,len(index)), index=index, name='sth') + s = Series(range(0, len(index)), index=index, name='sth') expected = ["first second", "foo one 0", " two 1", @@ -146,7 +148,7 @@ def test_name_printing(self): s.name = None self.assert_(not "Name:" in repr(s)) # test big series (diff code path) - s = Series(range(0,1000)) + s = Series(range(0, 1000)) s.name = "test" self.assert_("Name: test" in repr(s)) s.name = None @@ -174,6 +176,7 @@ def test_to_sparse_pass_name(self): result = self.ts.to_sparse() self.assertEquals(result.name, self.ts.name) + class TestNanops(unittest.TestCase): _multiprocess_can_split_ = True @@ -216,15 +219,17 @@ def test_sum_zero(self): self.assert_((df.sum(1) == 0).all()) def test_nansum_buglet(self): - s = Series([1.0, np.nan], index=[0,1]) + s = Series([1.0, np.nan], index=[0, 1]) result = np.nansum(s) assert_almost_equal(result, 1) + class SafeForSparse(object): pass _ts = tm.makeTimeSeries() + class TestSeries(unittest.TestCase, CheckNameIntegration): _multiprocess_can_split_ = True @@ -354,31 +359,31 @@ def test_constructor_dtype_nocast(self): def test_constructor_dtype_datetime64(self): import pandas.tslib as tslib - s = Series(tslib.iNaT,dtype='M8[ns]',index=range(5)) + s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) self.assert_(isnull(s).all() == True) - s = Series(tslib.NaT,dtype='M8[ns]',index=range(5)) + s = Series(tslib.NaT, dtype='M8[ns]', index=range(5)) self.assert_(isnull(s).all() == True) - s = Series(nan,dtype='M8[ns]',index=range(5)) + s = Series(nan, dtype='M8[ns]', index=range(5)) self.assert_(isnull(s).all() == True) - s = Series([ datetime(2001,1,2,0,0), tslib.iNaT ],dtype='M8[ns]') + s = Series([datetime(2001, 1, 2, 0, 0), tslib.iNaT], dtype='M8[ns]') self.assert_(isnull(s[1]) == True) self.assert_(s.dtype == 'M8[ns]') - s = Series([ datetime(2001,1,2,0,0), nan ],dtype='M8[ns]') + s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') self.assert_(isnull(s[1]) == True) self.assert_(s.dtype == 'M8[ns]') def test_constructor_dict(self): - d = {'a' : 0., 'b' : 1., 'c' : 2.} + d = {'a': 0., 'b': 1., 'c': 2.} result = Series(d, index=['b', 'c', 'd', 'a']) expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a']) assert_series_equal(result, expected) pidx = tm.makePeriodIndex(100) - d = {pidx[0] : 0, pidx[1] : 1} + d = {pidx[0]: 0, pidx[1]: 1} result = Series(d, index=pidx) expected = Series(np.nan, pidx) expected.ix[0] = 0 @@ -407,20 +412,20 @@ def test_constructor_set(self): self.assertRaises(TypeError, Series, values) def test_fromDict(self): - data = {'a' : 0, 'b' : 1, 'c' : 2, 'd' : 3} + data = {'a': 0, 'b': 1, 'c': 2, 'd': 3} series = Series(data) self.assert_(tm.is_sorted(series.index)) - data = {'a' : 0, 'b' : '1', 'c' : '2', 'd' : datetime.now()} + data = {'a': 0, 'b': '1', 'c': '2', 'd': datetime.now()} series = Series(data) self.assert_(series.dtype == np.object_) - data = {'a' : 0, 'b' : '1', 'c' : '2', 'd' : '3'} + data = {'a': 0, 'b': '1', 'c': '2', 'd': '3'} series = Series(data) self.assert_(series.dtype == np.object_) - data = {'a' : '0', 'b' : '1'} + data = {'a': '0', 'b': '1'} series = Series(data, dtype=float) self.assert_(series.dtype == np.float64) @@ -470,7 +475,7 @@ def _check_all_orients(series, dtype=None): _check_all_orients(self.ts) # dtype - s = Series(range(6), index=['a','b','c','d','e','f']) + s = Series(range(6), index=['a', 'b', 'c', 'd', 'e', 'f']) _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) _check_all_orients(Series(s, dtype=np.int), dtype=np.int) @@ -596,8 +601,8 @@ def test_getitem_int64(self): self.assertEqual(self.ts[idx], self.ts[5]) def test_getitem_fancy(self): - slice1 = self.series[[1,2,3]] - slice2 = self.objSeries[[1,2,3]] + slice1 = self.series[[1, 2, 3]] + slice2 = self.objSeries[[1, 2, 3]] self.assertEqual(self.series.index[2], slice1.index[1]) self.assertEqual(self.objSeries.index[2], slice2.index[1]) self.assertEqual(self.series[2], slice1[1]) @@ -673,7 +678,7 @@ def test_getitem_out_of_bounds(self): def test_getitem_setitem_integers(self): # caused bug without test - s = Series([1,2,3], ['a','b','c']) + s = Series([1, 2, 3], ['a', 'b', 'c']) self.assertEqual(s.ix[0], s['a']) s.ix[0] = 5 @@ -700,7 +705,7 @@ def test_setitem_ambiguous_keyerror(self): def test_setitem_float_labels(self): # note labels are floats - s = Series(['a','b','c'],index=[0,0.5,1]) + s = Series(['a', 'b', 'c'], index=[0, 0.5, 1]) tmp = s.copy() s.ix[1] = 'zoo' @@ -723,7 +728,7 @@ def test_slice(self): self.assertEqual(numSlice.index[1], self.series.index[11]) self.assert_(tm.equalContents(numSliceEnd, - np.array(self.series)[-10:])) + np.array(self.series)[-10:])) # test return view sl = self.series[10:20] @@ -732,7 +737,7 @@ def test_slice(self): def test_slice_can_reorder_not_uniquely_indexed(self): s = Series(1, index=['a', 'a', 'b', 'b', 'c']) - result = s[::-1] # it works! + result = s[::-1] # it works! def test_slice_float_get_set(self): result = self.ts[4.0:10.0] @@ -746,12 +751,12 @@ def test_slice_float_get_set(self): self.assertRaises(TypeError, self.ts.__setitem__, slice(4.5, 10.0), 0) def test_slice_floats2(self): - s = Series(np.random.rand(10), index=np.arange(10,20,dtype=float)) + s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) self.assert_(len(s.ix[12.0:]) == 8) self.assert_(len(s.ix[12.5:]) == 7) - i = np.arange(10,20,dtype=float) + i = np.arange(10, 20, dtype=float) i[2] = 12.2 s.index = i self.assert_(len(s.ix[12.0:]) == 8) @@ -783,7 +788,7 @@ def test_slice_float64(self): def test_setitem(self): self.ts[self.ts.index[5]] = np.NaN - self.ts[[1,2,17]] = np.NaN + self.ts[[1, 2, 17]] = np.NaN self.ts[6] = np.NaN self.assert_(np.isnan(self.ts[6])) self.assert_(np.isnan(self.ts[2])) @@ -841,14 +846,14 @@ def test_reshape_non_2d(self): def test_reshape_2d_return_array(self): x = Series(np.random.random(201), name='x') - result = x.reshape((-1,1)) + result = x.reshape((-1, 1)) self.assert_(not isinstance(result, Series)) - result2 = np.reshape(x, (-1,1)) + result2 = np.reshape(x, (-1, 1)) self.assert_(not isinstance(result, Series)) result = x[:, None] - expected = x.reshape((-1,1)) + expected = x.reshape((-1, 1)) assert_almost_equal(result, expected) def test_basic_getitem_with_labels(self): @@ -912,7 +917,7 @@ def test_basic_setitem_with_labels(self): self.assertRaises(Exception, s.__setitem__, arr_inds_notfound, 0) def test_ix_getitem(self): - inds = self.series.index[[3,4,7]] + inds = self.series.index[[3, 4, 7]] assert_series_equal(self.series.ix[inds], self.series.reindex(inds)) assert_series_equal(self.series.ix[5::2], self.series[5::2]) @@ -972,14 +977,14 @@ def test_where(self): s = Series(np.random.randn(5)) cond = s > 0 - rs = s.where(cond).dropna() + rs = s.where(cond).dropna() rs2 = s[cond] assert_series_equal(rs, rs2) - rs = s.where(cond,-s) + rs = s.where(cond, -s) assert_series_equal(rs, s.abs()) - rs = s.where(cond) + rs = s.where(cond) assert(s.shape == rs.shape) assert(rs is not s) @@ -1006,7 +1011,6 @@ def test_where_inplace(self): rs.where(cond, -s, inplace=True) assert_series_equal(rs, s.where(cond, -s)) - def test_mask(self): s = Series(np.random.randn(5)) cond = s > 0 @@ -1015,13 +1019,13 @@ def test_mask(self): assert_series_equal(rs, s.mask(~cond)) def test_ix_setitem(self): - inds = self.series.index[[3,4,7]] + inds = self.series.index[[3, 4, 7]] result = self.series.copy() result.ix[inds] = 5 expected = self.series.copy() - expected[[3,4,7]] = 5 + expected[[3, 4, 7]] = 5 assert_series_equal(result, expected) result.ix[5:10] = 10 @@ -1031,7 +1035,7 @@ def test_ix_setitem(self): # set slice with indices d1, d2 = self.series.index[[5, 15]] result.ix[d1:d2] = 6 - expected[5:16] = 6 # because it's inclusive + expected[5:16] = 6 # because it's inclusive assert_series_equal(result, expected) # set index value @@ -1118,18 +1122,18 @@ def test_repr(self): rep_str = repr(ser) self.assert_("Name: 0" in rep_str) - ser = Series(["a\n\r\tb"],name=["a\n\r\td"],index=["a\n\r\tf"]) + ser = Series(["a\n\r\tb"], name=["a\n\r\td"], index=["a\n\r\tf"]) self.assertFalse("\t" in repr(ser)) self.assertFalse("\r" in repr(ser)) self.assertFalse("a\n" in repr(ser)) def test_tidy_repr(self): - a=Series([u"\u05d0"]*1000) - a.name= 'title1' + a = Series([u"\u05d0"] * 1000) + a.name = 'title1' repr(a) # should not raise exception def test_repr_bool_fails(self): - s = Series([DataFrame(np.random.randn(2,2)) for i in range(5)]) + s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)]) import sys @@ -1152,7 +1156,7 @@ def test_repr_name_iterable_indexable(self): s.name = (u"\u05d0",) * 2 repr(s) - def test_repr_should_return_str (self): + def test_repr_should_return_str(self): """ http://docs.python.org/py3k/reference/datamodel.html#object.__repr__ http://docs.python.org/reference/datamodel.html#object.__repr__ @@ -1161,27 +1165,25 @@ def test_repr_should_return_str (self): (str on py2.x, str (unicode) on py3) """ - data=[8,5,3,5] - index1=[u"\u03c3",u"\u03c4",u"\u03c5",u"\u03c6"] - df=Series(data,index=index1) - self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 - + data = [8, 5, 3, 5] + index1 = [u"\u03c3", u"\u03c4", u"\u03c5", u"\u03c6"] + df = Series(data, index=index1) + self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 def test_unicode_string_with_unicode(self): - df = Series([u"\u05d0"],name=u"\u05d1") + df = Series([u"\u05d0"], name=u"\u05d1") if py3compat.PY3: str(df) else: unicode(df) def test_bytestring_with_unicode(self): - df = Series([u"\u05d0"],name=u"\u05d1") + df = Series([u"\u05d0"], name=u"\u05d1") if py3compat.PY3: bytes(df) else: str(df) - def test_timeseries_repr_object_dtype(self): index = Index([datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object) @@ -1191,7 +1193,7 @@ def test_timeseries_repr_object_dtype(self): ts = tm.makeTimeSeries(1000) self.assert_(repr(ts).splitlines()[-1].startswith('Freq:')) - ts2 = ts.ix[np.random.randint(0, len(ts)-1, 400)] + ts2 = ts.ix[np.random.randint(0, len(ts) - 1, 400)] repr(ts).splitlines()[-1] def test_iter(self): @@ -1282,7 +1284,7 @@ def test_skew(self): _skip_if_no_scipy() from scipy.stats import skew - alt =lambda x: skew(x, bias=False) + alt = lambda x: skew(x, bias=False) self._check_stat_op('skew', alt) def test_kurt(self): @@ -1326,8 +1328,8 @@ def test_cummin(self): self.assert_(np.array_equal(self.ts.cummin(), np.minimum.accumulate(np.array(self.ts)))) ts = self.ts.copy() - ts[::2] = np.NaN - result = ts.cummin()[1::2] + ts[::2] = np.NaN + result = ts.cummin()[1::2] expected = np.minimum.accumulate(ts.valid()) self.assert_(np.array_equal(result, expected)) @@ -1336,8 +1338,8 @@ def test_cummax(self): self.assert_(np.array_equal(self.ts.cummax(), np.maximum.accumulate(np.array(self.ts)))) ts = self.ts.copy() - ts[::2] = np.NaN - result = ts.cummax()[1::2] + ts[::2] = np.NaN + result = ts.cummax()[1::2] expected = np.maximum.accumulate(ts.valid()) self.assert_(np.array_equal(result, expected)) @@ -1410,7 +1412,7 @@ def test_round(self): self.assertEqual(result.name, self.ts.name) def test_prod_numpy16_bug(self): - s = Series([1., 1., 1.] , index=range(3)) + s = Series([1., 1., 1.], index=range(3)) result = s.prod() self.assert_(not isinstance(result, Series)) @@ -1439,8 +1441,8 @@ def test_describe_percentiles(self): def test_describe_objects(self): s = Series(['a', 'b', 'b', np.nan, np.nan, np.nan, 'c', 'd', 'a', 'a']) result = s.describe() - expected = Series({'count' : 7, 'unique' : 4, - 'top' : 'a', 'freq' : 3}, index=result.index) + expected = Series({'count': 7, 'unique': 4, + 'top': 'a', 'freq': 3}, index=result.index) assert_series_equal(result, expected) dt = list(self.ts.index) @@ -1449,10 +1451,10 @@ def test_describe_objects(self): rs = ser.describe() min_date = min(dt) max_date = max(dt) - xp = Series({'count' : len(dt), - 'unique' : len(self.ts.index), - 'first' : min_date, 'last' : max_date, 'freq' : 2, - 'top' : min_date}, index=rs.index) + xp = Series({'count': len(dt), + 'unique': len(self.ts.index), + 'first': min_date, 'last': max_date, 'freq': 2, + 'top': min_date}, index=rs.index) assert_series_equal(rs, xp) def test_describe_empty(self): @@ -1558,7 +1560,7 @@ def check_comparators(series, other): def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) - s2 = Series({'x' : 0.}) + s2 = Series({'x': 0.}) # it works! _ = s1 * s2 @@ -1575,7 +1577,7 @@ def test_operators_na_handling(self): from decimal import Decimal from datetime import date s = Series([Decimal('1.3'), Decimal('2.3')], - index=[date(2012,1,1), date(2012,1,2)]) + index=[date(2012, 1, 1), date(2012, 1, 2)]) result = s + s.shift(1) self.assert_(isnull(result[0])) @@ -1700,7 +1702,7 @@ def test_between(self): def test_setitem_na_exception(self): def testme1(): - s = Series([2,3,4,5,6,7,8,9,10]) + s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) s[::2] = np.nan def testme2(): @@ -1716,19 +1718,19 @@ def testme3(): self.assertRaises(Exception, testme3) def test_scalar_na_cmp_corners(self): - s = Series([2,3,4,5,6,7,8,9,10]) + s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) def tester(a, b): return a & b - self.assertRaises(ValueError, tester, s, datetime(2005,1,1)) + self.assertRaises(ValueError, tester, s, datetime(2005, 1, 1)) - s = Series([2,3,4,5,6,7,8,9,datetime(2005,1,1)]) + s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) s[::2] = np.nan assert_series_equal(tester(s, list(s)), s) - d = DataFrame({'A':s}) + d = DataFrame({'A': s}) self.assertRaises(TypeError, tester, s, d) def test_idxmin(self): @@ -1826,9 +1828,9 @@ def test_series_frame_radd_bug(self): expected = vals.map(lambda x: 'foo_' + x) assert_series_equal(result, expected) - frame = DataFrame({'vals' : vals}) + frame = DataFrame({'vals': vals}) result = 'foo_' + frame - expected = DataFrame({'vals' : vals.map(lambda x: 'foo_' + x)}) + expected = DataFrame({'vals': vals.map(lambda x: 'foo_' + x)}) tm.assert_frame_equal(result, expected) # really raise this time @@ -1841,7 +1843,7 @@ def test_operators_frame(self): sys.stderr = buf # rpow does not work with DataFrame try: - df = DataFrame({'A' : self.ts}) + df = DataFrame({'A': self.ts}) tm.assert_almost_equal(self.ts + self.ts, (self.ts + df)['A']) tm.assert_almost_equal(self.ts ** self.ts, (self.ts ** df)['A']) @@ -1990,12 +1992,12 @@ def test_corr_rank(self): raise nose.SkipTest # results from R - A = Series([-0.89926396, 0.94209606, -1.03289164, -0.95445587, + A = Series([-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -0.06430576, -2.09704447, 0.40660407, - -0.89926396, 0.94209606]) - B = Series([-1.01270225, -0.62210117, -1.56895827, 0.59592943, - -0.01680292, 1.17258718, -1.06009347, -0.10222060, - -0.89076239, 0.89372375]) + -0.89926396, 0.94209606]) + B = Series([-1.01270225, -0.62210117, -1.56895827, 0.59592943, + -0.01680292, 1.17258718, -1.06009347, -0.10222060, + -0.89076239, 0.89372375]) kexp = 0.4319297 sexp = 0.5853767 self.assertAlmostEqual(A.corr(B, method='kendall'), kexp) @@ -2003,10 +2005,11 @@ def test_corr_rank(self): def test_cov(self): # full overlap - self.assertAlmostEqual(self.ts.cov(self.ts), self.ts.std()**2) + self.assertAlmostEqual(self.ts.cov(self.ts), self.ts.std() ** 2) # partial overlap - self.assertAlmostEqual(self.ts[:15].cov(self.ts[5:]), self.ts[5:15].std()**2) + self.assertAlmostEqual( + self.ts[:15].cov(self.ts[5:]), self.ts[5:15].std() ** 2) # No overlap self.assert_(np.isnan(self.ts[::2].cov(self.ts[1::2]))) @@ -2135,7 +2138,6 @@ def test_sort_index(self): sorted_series = random_order.sort_index() assert_series_equal(sorted_series, self.ts) - # descending sorted_series = random_order.sort_index(ascending=False) assert_series_equal(sorted_series, @@ -2177,7 +2179,7 @@ def test_rank(self): assert_series_equal(ranks, oranks) - mask = np.isnan(self.ts) + mask = np.isnan(self.ts) filled = self.ts.fillna(np.inf) exp = rankdata(filled) @@ -2208,10 +2210,11 @@ def test_from_csv(self): outfile.write('1998-01-01|1.0\n1999-01-01|2.0') outfile.close() series = Series.from_csv(path, sep='|') - checkseries = Series({datetime(1998,1,1): 1.0, datetime(1999,1,1): 2.0}) + checkseries = Series( + {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}) assert_series_equal(checkseries, series) - series = Series.from_csv(path, sep='|',parse_dates=False) + series = Series.from_csv(path, sep='|', parse_dates=False) checkseries = Series({'1998-01-01': 1.0, '1999-01-01': 2.0}) assert_series_equal(checkseries, series) @@ -2230,8 +2233,8 @@ def test_to_csv(self): os.remove('_foo') def test_to_csv_unicode_index(self): - buf=StringIO() - s=Series([u"\u05d0","d2"], index=[u"\u05d0",u"\u05d1"]) + buf = StringIO() + s = Series([u"\u05d0", "d2"], index=[u"\u05d0", u"\u05d1"]) s.to_csv(buf, encoding='UTF-8') buf.seek(0) @@ -2245,7 +2248,7 @@ def test_tolist(self): xp = self.ts.values.tolist() assert_almost_equal(rs, xp) - #datetime64 + # datetime64 s = Series(self.ts.index) rs = s.tolist() xp = s.astype(object).values.tolist() @@ -2266,7 +2269,7 @@ def test_to_csv_float_format(self): os.remove(filename) def test_to_csv_list_entries(self): - s = Series(['jack and jill','jesse and frank']) + s = Series(['jack and jill', 'jesse and frank']) split = s.str.split(r'\s+and\s+') @@ -2297,16 +2300,18 @@ def test_valid(self): tm.assert_dict_equal(result, ts, compare_keys=False) def test_isnull(self): - ser = Series([0,5.4,3,nan,-0.001]) - assert_series_equal(ser.isnull(), Series([False,False,False,True,False])) - ser = Series(["hi","",nan]) - assert_series_equal(ser.isnull(), Series([False,False,True])) + ser = Series([0, 5.4, 3, nan, -0.001]) + assert_series_equal( + ser.isnull(), Series([False, False, False, True, False])) + ser = Series(["hi", "", nan]) + assert_series_equal(ser.isnull(), Series([False, False, True])) def test_notnull(self): - ser = Series([0,5.4,3,nan,-0.001]) - assert_series_equal(ser.notnull(), Series([True,True,True,False,True])) - ser = Series(["hi","",nan]) - assert_series_equal(ser.notnull(), Series([True,True,False])) + ser = Series([0, 5.4, 3, nan, -0.001]) + assert_series_equal( + ser.notnull(), Series([True, True, True, False, True])) + ser = Series(["hi", "", nan]) + assert_series_equal(ser.notnull(), Series([True, True, False])) def test_shift(self): shifted = self.ts.shift(1) @@ -2561,7 +2566,7 @@ def test_astype_cast_nan_int(self): self.assertRaises(ValueError, df.astype, np.int64) def test_astype_cast_object_int(self): - arr = Series(["car", "house", "tree","1"]) + arr = Series(["car", "house", "tree", "1"]) self.assertRaises(ValueError, arr.astype, int) self.assertRaises(ValueError, arr.astype, np.int64) @@ -2593,8 +2598,8 @@ def test_map(self): self.assert_(np.array_equal(result, self.ts * 2)) def test_map_int(self): - left = Series({'a' : 1., 'b' : 2., 'c' : 3., 'd' : 4}) - right = Series({1 : 11, 2 : 22, 3 : 33}) + left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4}) + right = Series({1: 11, 2: 22, 3: 33}) self.assert_(left.dtype == np.float_) self.assert_(issubclass(right.dtype.type, np.integer)) @@ -2633,7 +2638,7 @@ def test_apply(self): # how to handle Series result, #2316 result = self.ts.apply(lambda x: Series([x, x ** 2], index=['x', 'x^2'])) - expected = DataFrame({'x': self.ts, 'x^2': self.ts **2}) + expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) tm.assert_frame_equal(result, expected) def test_apply_same_length_inference_bug(self): @@ -2934,7 +2939,7 @@ def test_rename(self): # partial dict s = Series(np.arange(4), index=['a', 'b', 'c', 'd']) - renamed = s.rename({'b' : 'foo', 'd' : 'bar'}) + renamed = s.rename({'b': 'foo', 'd': 'bar'}) self.assert_(np.array_equal(renamed.index, ['a', 'foo', 'c', 'bar'])) def test_rename_inplace(self): @@ -2946,7 +2951,7 @@ def test_rename_inplace(self): self.assertEqual(self.ts.index[0], expected) def test_preserveRefs(self): - seq = self.ts[[5,10,15]] + seq = self.ts[[5, 10, 15]] seq[1] = np.NaN self.assertFalse(np.isnan(self.ts[10])) @@ -2964,7 +2969,7 @@ def test_pad_nan(self): self.assertTrue(res is None) expected = TimeSeries([np.nan, 1.0, 1.0, 3.0, 3.0], - ['z', 'a', 'b', 'c', 'd'], dtype=float) + ['z', 'a', 'b', 'c', 'd'], dtype=float) assert_series_equal(x[1:], expected[1:]) self.assert_(np.isnan(x[0]), np.isnan(expected[0])) @@ -2995,7 +3000,7 @@ def test_unstack(self): exp_index = MultiIndex(levels=[['one', 'two', 'three'], [0, 1]], labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) - expected = DataFrame({'bar' : s.values}, index=exp_index).sortlevel(0) + expected = DataFrame({'bar': s.values}, index=exp_index).sortlevel(0) unstacked = s.unstack(0) assert_frame_equal(unstacked, expected) @@ -3025,7 +3030,8 @@ def test_fillna(self): ts[2] = np.NaN - self.assert_(np.array_equal(ts.fillna(method='ffill'), [0., 1., 1., 3., 4.])) + self.assert_( + np.array_equal(ts.fillna(method='ffill'), [0., 1., 1., 3., 4.])) self.assert_(np.array_equal(ts.fillna(method='backfill'), [0., 1., 3., 3., 4.])) @@ -3035,7 +3041,7 @@ def test_fillna(self): self.assertRaises(ValueError, self.ts.fillna, value=0, method='ffill') def test_fillna_bug(self): - x = Series([nan, 1., nan, 3., nan],['z','a','b','c','d']) + x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd']) filled = x.fillna(method='ffill') expected = Series([nan, 1., 1., 3., 3.], x.index) assert_series_equal(filled, expected) @@ -3045,7 +3051,7 @@ def test_fillna_bug(self): assert_series_equal(filled, expected) def test_fillna_inplace(self): - x = Series([nan, 1., nan, 3., nan],['z','a','b','c','d']) + x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd']) y = x.copy() res = y.fillna(value=0, inplace=True) @@ -3102,7 +3108,7 @@ def test_replace(self): self.assert_((isnull(ser[:5])).all()) # replace with different values - rs = ser.replace({np.nan : -1, 'foo' : -2, 'bar' : -3}) + rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) self.assert_((rs[:5] == -1).all()) self.assert_((rs[6:10] == -2).all()) @@ -3145,8 +3151,8 @@ def test_replace(self): assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) # malformed - self.assertRaises(ValueError, ser.replace, [1,2,3], [np.nan, 0]) - self.assertRaises(ValueError, ser.replace, xrange(1,3), [np.nan, 0]) + self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) + self.assertRaises(ValueError, ser.replace, xrange(1, 3), [np.nan, 0]) ser = Series([0, 1, 2, 3, 4]) result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) @@ -3225,7 +3231,7 @@ def test_diff(self): rs = s.diff() self.assertEqual(rs[1], 1) - #neg n + # neg n rs = self.ts.diff(-1) xp = self.ts - self.ts.shift(-1) assert_series_equal(rs, xp) @@ -3250,7 +3256,7 @@ def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) chg = s.pct_change() - expected = Series([np.nan, 0.5, np.nan, 2.5/1.5 -1, .2]) + expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) def test_autocorr(self): @@ -3287,7 +3293,7 @@ def test_mpl_compat_hack(self): def test_select(self): n = len(self.ts) result = self.ts.select(lambda x: x >= self.ts.index[n // 2]) - expected = self.ts.reindex(self.ts.index[n//2:]) + expected = self.ts.reindex(self.ts.index[n // 2:]) assert_series_equal(result, expected) result = self.ts.select(lambda x: x.weekday() == 2) @@ -3306,6 +3312,7 @@ def test_numpy_unique(self): # it works! result = np.unique(self.ts) + class TestSeriesNonUnique(unittest.TestCase): _multiprocess_can_split_ = True @@ -3378,14 +3385,14 @@ def test_reset_index(self): df = ser.reset_index(name='value2') self.assert_('value2' in df) - #check inplace + # check inplace s = ser.reset_index(drop=True) s2 = ser res = s2.reset_index(drop=True, inplace=True) self.assertTrue(res is None) assert_series_equal(s, s2) - #level + # level index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], @@ -3429,7 +3436,7 @@ def test_replace(self): self.assert_((isnull(ser[:5])).all()) # replace with different values - rs = ser.replace({np.nan : -1, 'foo' : -2, 'bar' : -3}) + rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) self.assert_((rs[:5] == -1).all()) self.assert_((rs[6:10] == -2).all()) @@ -3471,9 +3478,9 @@ def test_repeat(self): def test_unique_data_ownership(self): # it works! #1807 - Series(Series(["a","c","b"]).unique()).sort() + Series(Series(["a", "c", "b"]).unique()).sort() if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index ba37355afec7a..0432d11aaa254 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -11,6 +11,7 @@ assert_series_equal, assert_almost_equal) + class TestRank(unittest.TestCase): _multiprocess_can_split_ = True s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) @@ -113,5 +114,5 @@ def test_rank_int(self): if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 9d803b3fd662a..910bfda73abe5 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -20,6 +20,7 @@ import pandas.core.strings as strings + class TestStringMethods(unittest.TestCase): _multiprocess_can_split_ = True @@ -74,7 +75,7 @@ def test_count(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = [u'foo', u'foofoo', NA, u'foooofooofommmfoo'] result = strings.str_count(values, 'f[o]+') @@ -99,7 +100,7 @@ def test_contains(self): self.assert_(result.dtype == np.bool_) tm.assert_almost_equal(result, expected) - #mixed + # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_contains(mixed, 'o') xp = [False, NA, False, NA, NA, True, NA, NA, NA] @@ -109,7 +110,7 @@ def test_contains(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = [u'foo', NA, u'fooommm__foo', u'mmm_'] pat = 'mmm[_]+' @@ -134,7 +135,7 @@ def test_startswith(self): exp = Series([False, NA, True, False, False, NA, True]) tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_startswith(mixed, 'f') xp = [False, NA, False, NA, NA, True, NA, NA, NA] @@ -144,7 +145,7 @@ def test_startswith(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'om', NA, u'foo_nom', u'nom', u'bar_foo', NA, u'foo']) @@ -162,7 +163,7 @@ def test_endswith(self): exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_endswith(mixed, 'f') xp = [False, NA, False, NA, NA, False, NA, NA, NA] @@ -172,7 +173,7 @@ def test_endswith(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'om', NA, u'foo_nom', u'nom', u'bar_foo', NA, u'foo']) @@ -193,7 +194,7 @@ def test_lower_upper(self): result = result.str.lower() tm.assert_series_equal(result, values) - #mixed + # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.]) mixed = mixed.str.upper() @@ -202,7 +203,7 @@ def test_lower_upper(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'om', NA, u'nom', u'nom']) result = values.str.upper() @@ -223,7 +224,7 @@ def test_replace(self): exp = Series(['foobarBAD', NA]) tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD', None, 1, 2.]) @@ -232,7 +233,7 @@ def test_replace(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'fooBAD__barBAD', NA]) result = values.str.replace('BAD[_]*', '') @@ -254,7 +255,7 @@ def test_repeat(self): exp = Series(['a', 'bb', NA, 'cccc', NA, 'dddddd']) tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.]) @@ -263,7 +264,7 @@ def test_repeat(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'a', u'b', NA, u'c', NA, u'd']) result = values.str.repeat(3) @@ -274,7 +275,6 @@ def test_repeat(self): exp = Series([u'a', u'bb', NA, u'cccc', NA, u'dddddd']) tm.assert_series_equal(result, exp) - def test_match(self): values = Series(['fooBAD__barBAD', NA, 'foo']) @@ -282,7 +282,7 @@ def test_match(self): exp = Series([('BAD__', 'BAD'), NA, []]) tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) @@ -291,7 +291,7 @@ def test_match(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'fooBAD__barBAD', NA, u'foo']) result = values.str.match('.*(BAD[_]+).*(BAD)') @@ -303,7 +303,7 @@ def test_join(self): result = values.str.split('_').str.join('_') tm.assert_series_equal(values, result) - #mixed + # mixed mixed = Series(['a_b', NA, 'asdf_cas_asdf', True, datetime.today(), 'foo', None, 1, 2.]) @@ -313,7 +313,7 @@ def test_join(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'a_b_c', u'c_d_e', np.nan, u'f_g_h']) result = values.str.split('_').str.join('_') tm.assert_series_equal(values, result) @@ -325,7 +325,7 @@ def test_len(self): exp = values.map(lambda x: len(x) if com.notnull(x) else NA) tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = Series(['a_b', NA, 'asdf_cas_asdf', True, datetime.today(), 'foo', None, 1, 2.]) @@ -335,7 +335,7 @@ def test_len(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'foo', u'fooo', u'fooooo', np.nan, u'fooooooo']) result = values.str.len() @@ -349,7 +349,7 @@ def test_findall(self): exp = Series([['BAD__', 'BAD'], NA, [], ['BAD']]) tm.assert_almost_equal(result, exp) - #mixed + # mixed mixed = Series(['fooBAD__barBAD', NA, 'foo', True, datetime.today(), 'BAD', None, 1, 2.]) @@ -359,7 +359,7 @@ def test_findall(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'fooBAD__barBAD', NA, u'foo', u'BAD']) result = values.str.findall('BAD[_]*') @@ -381,7 +381,7 @@ def test_pad(self): exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) - #mixed + # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'ee', None, 1, 2.]) @@ -409,7 +409,7 @@ def test_pad(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'a', u'b', NA, u'c', NA, u'eeeeee']) result = values.str.pad(5, side='left') @@ -431,7 +431,7 @@ def test_center(self): exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) - #mixed + # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'c', 'eee', None, 1, 2.]) @@ -442,7 +442,7 @@ def test_center(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'a', u'b', NA, u'c', NA, u'eeeeee']) result = values.str.center(5) @@ -456,12 +456,12 @@ def test_split(self): exp = Series([['a', 'b', 'c'], ['c', 'd', 'e'], NA, ['f', 'g', 'h']]) tm.assert_series_equal(result, exp) - #more than one char + # more than one char values = Series(['a__b__c', 'c__d__e', NA, 'f__g__h']) result = values.str.split('__') tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(), None, 1, 2.]) @@ -472,7 +472,7 @@ def test_split(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'a_b_c', u'c_d_e', NA, u'f_g_h']) result = values.str.split('_') @@ -488,7 +488,7 @@ def test_split_noargs(self): self.assertEquals(result[1], ['Travis', 'Oliphant']) def test_split_maxsplit(self): - #re.split 0, str.split -1 + # re.split 0, str.split -1 s = Series(['bd asdf jfg', 'kjasdflqw asdfnfk']) result = s.str.split(n=-1) @@ -520,13 +520,13 @@ def test_pipe_failures(self): tm.assert_series_equal(result, exp) def test_slice(self): - values = Series(['aafootwo','aabartwo', NA, 'aabazqux']) + values = Series(['aafootwo', 'aabartwo', NA, 'aabazqux']) result = values.str.slice(2, 5) exp = Series(['foo', 'bar', NA, 'baz']) tm.assert_series_equal(result, exp) - #mixed + # mixed mixed = Series(['aafootwo', NA, 'aabartwo', True, datetime.today(), None, 1, 2.]) @@ -537,7 +537,7 @@ def test_slice(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'aafootwo', u'aabartwo', NA, u'aabazqux']) result = values.str.slice(2, 5) @@ -563,7 +563,7 @@ def test_strip_lstrip_rstrip(self): tm.assert_series_equal(result, exp) def test_strip_lstrip_rstrip_mixed(self): - #mixed + # mixed mixed = Series([' aa ', NA, ' bb \t\n', True, datetime.today(), None, 1, 2.]) @@ -589,7 +589,7 @@ def test_strip_lstrip_rstrip_mixed(self): tm.assert_almost_equal(rs, xp) def test_strip_lstrip_rstrip_unicode(self): - #unicode + # unicode values = Series([u' aa ', u' bb \n', NA, u'cc ']) result = values.str.strip() @@ -644,7 +644,7 @@ def test_get(self): expected = Series(['b', 'd', np.nan, 'g']) tm.assert_series_equal(result, expected) - #mixed + # mixed mixed = Series(['a_b_c', NA, 'c_d_e', True, datetime.today(), None, 1, 2.]) @@ -655,7 +655,7 @@ def test_get(self): self.assert_(isinstance(rs, Series)) tm.assert_almost_equal(rs, xp) - #unicode + # unicode values = Series([u'a_b_c', u'c_d_e', np.nan, u'f_g_h']) result = values.str.split('_').str.get(1) @@ -711,7 +711,7 @@ def test_more_replace(self): assert_series_equal(result, expected) result = s.str.replace('^.a|dog', 'XX-XX ', case=False) - expected = Series(['A', 'B', 'C', 'XX-XX ba', 'XX-XX ca', '', NA, + expected = Series(['A', 'B', 'C', 'XX-XX ba', 'XX-XX ca', '', NA, 'XX-XX BA', 'XX-XX ', 'XX-XX t']) assert_series_equal(result, expected) @@ -779,5 +779,5 @@ def test_encode_decode_errors(self): tm.assert_series_equal(result, exp) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 3492ca038097d..7e5341fd5b311 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -9,8 +9,10 @@ import pandas.algos as algos from datetime import datetime + class TestTseriesUtil(unittest.TestCase): _multiprocess_can_split_ = True + def test_combineFunc(self): pass @@ -59,6 +61,7 @@ def test_pad(self): expect_filler = [-1, -1, -1, -1, -1] self.assert_(np.array_equal(filler, expect_filler)) + def test_left_join_indexer_unique(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([2, 2, 3, 4, 4], dtype=np.int64) @@ -67,6 +70,7 @@ def test_left_join_indexer_unique(): expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) assert(np.array_equal(result, expected)) + def test_left_outer_join_bug(): left = np.array([0, 1, 0, 1, 1, 2, 3, 1, 0, 2, 1, 2, 0, 1, 1, 2, 3, 2, 3, 2, 1, 1, 3, 0, 3, 2, 3, 0, 0, 2, 3, 2, 0, 3, 1, 3, 0, 1, @@ -88,6 +92,7 @@ def test_left_outer_join_bug(): assert(np.array_equal(lidx, exp_lidx)) assert(np.array_equal(ridx, exp_ridx)) + def test_inner_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) @@ -110,6 +115,7 @@ def test_inner_join_indexer(): assert_almost_equal(ares, [0]) assert_almost_equal(bres, [0]) + def test_outer_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) @@ -132,6 +138,7 @@ def test_outer_join_indexer(): assert_almost_equal(ares, [0]) assert_almost_equal(bres, [0]) + def test_left_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) @@ -153,9 +160,10 @@ def test_left_join_indexer(): assert_almost_equal(ares, [0]) assert_almost_equal(bres, [0]) + def test_left_join_indexer2(): - idx = Index([1,1,2,5]) - idx2 = Index([1,2,5,7,9]) + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = algos.left_join_indexer_int64(idx2, idx) @@ -168,9 +176,10 @@ def test_left_join_indexer2(): exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) assert_almost_equal(ridx, exp_ridx) + def test_outer_join_indexer2(): - idx = Index([1,1,2,5]) - idx2 = Index([1,2,5,7,9]) + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = algos.outer_join_indexer_int64(idx2, idx) @@ -183,9 +192,10 @@ def test_outer_join_indexer2(): exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) assert_almost_equal(ridx, exp_ridx) + def test_inner_join_indexer2(): - idx = Index([1,1,2,5]) - idx2 = Index([1,2,5,7,9]) + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = algos.inner_join_indexer_int64(idx2, idx) @@ -203,21 +213,21 @@ def test_is_lexsorted(): failure = [ np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0]), + 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0]), np.array([30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, - 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, - 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, - 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, - 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22, - 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, - 4, 3, 2, 1, 0])] + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, + 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22, + 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, + 4, 3, 2, 1, 0])] assert(not algos.is_lexsorted(failure)) @@ -230,6 +240,7 @@ def test_is_lexsorted(): # assert(np.array_equal(result, expected)) + def test_groupsort_indexer(): a = np.random.randint(0, 1000, 100).astype(np.int64) b = np.random.randint(0, 1000, 100).astype(np.int64) @@ -246,12 +257,14 @@ def test_groupsort_indexer(): expected = np.lexsort((b, a)) assert(np.array_equal(result, expected)) + def test_ensure_platform_int(): arr = np.arange(100) result = algos.ensure_platform_int(arr) assert(result is arr) + def test_duplicated_with_nas(): keys = np.array([0, 1, nan, 0, 2, nan], dtype=object) @@ -264,7 +277,7 @@ def test_duplicated_with_nas(): assert(np.array_equal(result, expected)) keys = np.empty(8, dtype=object) - for i, t in enumerate(zip([0, 0, nan, nan]*2, [0, nan, 0, nan]*2)): + for i, t in enumerate(zip([0, 0, nan, nan] * 2, [0, nan, 0, nan] * 2)): keys[i] = t result = lib.duplicated(keys) @@ -277,6 +290,7 @@ def test_duplicated_with_nas(): expected = trues + falses assert(np.array_equal(result, expected)) + def test_maybe_booleans_to_slice(): arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) result = lib.maybe_booleans_to_slice(arr) @@ -285,11 +299,13 @@ def test_maybe_booleans_to_slice(): result = lib.maybe_booleans_to_slice(arr[:0]) assert(result == slice(0, 0)) + def test_convert_objects(): arr = np.array(['a', 'b', nan, nan, 'd', 'e', 'f'], dtype='O') result = lib.maybe_convert_objects(arr) assert(result.dtype == np.object_) + def test_convert_infs(): arr = np.array(['inf', 'inf', 'inf'], dtype='O') result = lib.maybe_convert_numeric(arr, set(), False) @@ -310,6 +326,7 @@ def test_convert_objects_ints(): result = lib.maybe_convert_objects(arr) assert(issubclass(result.dtype.type, np.integer)) + def test_convert_objects_complex_number(): for dtype in np.sctypes['complex']: arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O') @@ -317,6 +334,7 @@ def test_convert_objects_complex_number(): result = lib.maybe_convert_objects(arr) assert(issubclass(result.dtype.type, np.complexfloating)) + def test_rank(): from pandas.compat.scipy import rankdata @@ -332,12 +350,14 @@ def _check(arr): _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan])) _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan])) + def test_get_reverse_indexer(): indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64) result = lib.get_reverse_indexer(indexer, 5) expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) assert(np.array_equal(result, expected)) + def test_pad_backfill_object_segfault(): from datetime import datetime old = np.array([], dtype='O') @@ -359,11 +379,13 @@ def test_pad_backfill_object_segfault(): expected = np.array([], dtype=np.int64) assert(np.array_equal(result, expected)) + def test_arrmap(): values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') result = algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) assert(result.dtype == np.bool_) + def test_series_grouper(): from pandas import Series obj = Series(np.random.randn(10)) @@ -380,6 +402,7 @@ def test_series_grouper(): exp_counts = np.array([3, 4], dtype=np.int64) assert_almost_equal(counts, exp_counts) + def test_series_bin_grouper(): from pandas import Series obj = Series(np.random.randn(10)) @@ -396,8 +419,10 @@ def test_series_bin_grouper(): exp_counts = np.array([3, 3, 4], dtype=np.int64) assert_almost_equal(counts, exp_counts) + class TestBinGroupers(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.obj = np.random.randn(10, 1) self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64) @@ -405,8 +430,8 @@ def setUp(self): def test_generate_bins(self): from pandas.core.groupby import generate_bins_generic - values = np.array([1,2,3,4,5,6], dtype=np.int64) - binner = np.array([0,3,6,9], dtype=np.int64) + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + binner = np.array([0, 3, 6, 9], dtype=np.int64) for func in [lib.generate_bins_dt64, generate_bins_generic]: bins = func(values, binner, closed='left') @@ -416,8 +441,8 @@ def test_generate_bins(self): assert((bins == np.array([3, 6, 6])).all()) for func in [lib.generate_bins_dt64, generate_bins_generic]: - values = np.array([1,2,3,4,5,6], dtype=np.int64) - binner = np.array([0,3,6], dtype=np.int64) + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + binner = np.array([0, 3, 6], dtype=np.int64) bins = func(values, binner, closed='right') assert((bins == np.array([3, 6])).all()) @@ -441,7 +466,7 @@ def test_group_bin_functions(self): 'prod': np.prod, 'min': np.min, 'max': np.max, - 'var': lambda x: x.var(ddof=1) if len(x) >=2 else np.nan + 'var': lambda x: x.var(ddof=1) if len(x) >= 2 else np.nan } for fname in funcs: @@ -459,14 +484,14 @@ def _check_versions(self, irr_func, bin_func, np_func): # bin-based version bins = np.array([3, 6], dtype=np.int64) - out = np.zeros((3, 1), np.float64) + out = np.zeros((3, 1), np.float64) counts = np.zeros(len(out), dtype=np.int64) bin_func(out, counts, obj, bins) assert_almost_equal(out, exp) bins = np.array([3, 9, 10], dtype=np.int64) - out = np.zeros((3, 1), np.float64) + out = np.zeros((3, 1), np.float64) counts = np.zeros(len(out), dtype=np.int64) bin_func(out, counts, obj, bins) exp = np.array([np_func(obj[:3]), np_func(obj[3:9]), @@ -476,7 +501,7 @@ def _check_versions(self, irr_func, bin_func, np_func): # duplicate bins bins = np.array([3, 6, 10, 10], dtype=np.int64) - out = np.zeros((4, 1), np.float64) + out = np.zeros((4, 1), np.float64) counts = np.zeros(len(out), dtype=np.int64) bin_func(out, counts, obj, bins) exp = np.array([np_func(obj[:3]), np_func(obj[3:6]), @@ -489,7 +514,7 @@ def test_group_ohlc(): obj = np.random.randn(20) bins = np.array([6, 12], dtype=np.int64) - out = np.zeros((3, 4), np.float64) + out = np.zeros((3, 4), np.float64) counts = np.zeros(len(out), dtype=np.int64) algos.group_ohlc(out, counts, obj[:, None], bins) @@ -510,6 +535,7 @@ def _ohlc(group): expected[0] = nan assert_almost_equal(out, expected) + def test_try_parse_dates(): from dateutil.parser import parse @@ -522,6 +548,7 @@ def test_try_parse_dates(): class TestTypeInference(unittest.TestCase): _multiprocess_can_split_ = True + def test_length_zero(self): result = lib.infer_dtype(np.array([], dtype='i4')) self.assertEqual(result, 'empty') @@ -597,7 +624,7 @@ def test_date(self): self.assert_(index.inferred_type == 'date') def test_to_object_array_tuples(self): - r = (5,6) + r = (5, 6) values = [r] result = lib.to_object_array_tuples(values) @@ -632,7 +659,8 @@ def test_int_index(self): assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(100)) - result = lib.reduce(arr, np.sum, dummy=dummy, labels=Index(np.arange(4))) + result = lib.reduce( + arr, np.sum, dummy=dummy, labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) @@ -644,5 +672,5 @@ def test_int_index(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index e2086fc84de2c..072acb1947c1f 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -34,7 +34,8 @@ def merge(left, right, how='inner', on=None, left_on=None, right_on=None, right_index=right_index, sort=sort, suffixes=suffixes, copy=copy) return op.get_result() -if __debug__: merge.__doc__ = _merge_doc % '\nleft : DataFrame' +if __debug__: + merge.__doc__ = _merge_doc % '\nleft : DataFrame' class MergeError(Exception): @@ -144,11 +145,8 @@ def _merger(x, y): return _merger(left, right) - # TODO: transformations?? # TODO: only copy DataFrames when modification necessary - - class _MergeOperation(object): """ Perform a database (SQL) merge operation between two DataFrame objects @@ -216,8 +214,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): continue right_na_indexer = right_indexer.take(na_indexer) - key_col.put(na_indexer, com.take_1d(self.right_join_keys[i], - right_na_indexer)) + key_col.put( + na_indexer, com.take_1d(self.right_join_keys[i], + right_na_indexer)) elif name in self.right and right_indexer is not None: na_indexer = (right_indexer == -1).nonzero()[0] if len(na_indexer) == 0: @@ -368,7 +367,7 @@ def _get_merge_keys(self): def _validate_specification(self): # Hm, any way to make this logic less complicated?? if (self.on is None and self.left_on is None - and self.right_on is None): + and self.right_on is None): if self.left_index and self.right_index: self.left_on, self.right_on = (), () @@ -380,14 +379,15 @@ def _validate_specification(self): raise MergeError('Must pass left_on or left_index=True') else: # use the common columns - common_cols = self.left.columns.intersection(self.right.columns) + common_cols = self.left.columns.intersection( + self.right.columns) if len(common_cols) == 0: raise MergeError('No common columns to perform merge on') self.left_on = self.right_on = common_cols elif self.on is not None: if self.left_on is not None or self.right_on is not None: raise MergeError('Can only pass on OR left_on and ' - 'right_on') + 'right_on') self.left_on = self.right_on = self.on elif self.left_on is not None: n = len(self.left_on) @@ -578,8 +578,10 @@ def _factorize_keys(lk, rk, sort=True): llab, rlab = _sort_labels(uniques, llab, rlab) # NA group - lmask = llab == -1; lany = lmask.any() - rmask = rlab == -1; rany = rmask.any() + lmask = llab == -1 + lany = lmask.any() + rmask = rlab == -1 + rany = rmask.any() if lany or rany: if lany: @@ -701,7 +703,7 @@ def _merge_blocks(self, merge_chunks): sofar = 0 for unit, blk in merge_chunks: - out_chunk = out[sofar : sofar + len(blk)] + out_chunk = out[sofar: sofar + len(blk)] if unit.indexer is None: # is this really faster than assigning to arr.flat? @@ -1038,7 +1040,7 @@ def _concat_blocks(self, blocks): return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for - x in self.objs])] + x in self.objs])] indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None]) @@ -1176,7 +1178,7 @@ def _concat_indexes(indexes): def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or - (levels is not None and len(levels) > 1)): + (levels is not None and len(levels) > 1)): zipped = zip(*keys) if names is None: names = [None] * len(zipped) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 9fd61f7439c02..54258edd09cb4 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -19,10 +19,11 @@ try: # mpl optional import pandas.tseries.converter as conv - conv.register() # needs to override so set_xlim works with str/number + conv.register() # needs to override so set_xlim works with str/number except ImportError: pass + def _get_standard_kind(kind): return {'density': 'kde'}.get(kind, kind) @@ -35,8 +36,8 @@ class _Options(dict): format that makes it easy to breakdown into groups later """ - #alias so the names are same as plotting method parameter names - _ALIASES = {'x_compat' : 'xaxis.compat'} + # alias so the names are same as plotting method parameter names + _ALIASES = {'x_compat': 'xaxis.compat'} _DEFAULT_KEYS = ['xaxis.compat'] def __init__(self): @@ -91,6 +92,7 @@ def use(self, key, value): plot_params = _Options() + def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', **kwds): """ @@ -147,7 +149,7 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, common = (mask[a] & mask[b]).values ax.scatter(df[b][common], df[a][common], - marker=marker, alpha=alpha, **kwds) + marker=marker, alpha=alpha, **kwds) ax.set_xlabel('') ax.set_ylabel('') @@ -321,7 +323,7 @@ def f(x): harmonic = 1.0 for x_even, x_odd in zip(amplitudes[1::2], amplitudes[2::2]): result += (x_even * sin(harmonic * x) + - x_odd * cos(harmonic * x)) + x_odd * cos(harmonic * x)) harmonic += 1.0 if len(amplitudes) % 2 != 0: result += amplitudes[-1] * sin(harmonic * x) @@ -337,7 +339,7 @@ def random_color(column): columns = [data[col] for col in data.columns if (col != class_column)] x = [-pi + 2.0 * pi * (t / float(samples)) for t in range(samples)] used_legends = set([]) - if ax == None: + if ax is None: ax = plt.gca(xlim=(-pi, pi)) for i in range(n): row = [columns[c][i] for c in range(len(columns))] @@ -381,7 +383,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): medians = np.array([np.median(sampling) for sampling in samplings]) midranges = np.array([(min(sampling) + max(sampling)) * 0.5 for sampling in samplings]) - if fig == None: + if fig is None: fig = plt.figure() x = range(samples) axes = [] @@ -484,7 +486,7 @@ def random_color(column): else: x = range(ncols) - if ax == None: + if ax is None: ax = plt.gca() # if user has not specified colors to use, choose at random @@ -535,7 +537,7 @@ def lag_plot(series, ax=None, **kwds): data = series.values y1 = data[:-1] y2 = data[1:] - if ax == None: + if ax is None: ax = plt.gca() ax.set_xlabel("y(t)") ax.set_ylabel("y(t + 1)") @@ -558,7 +560,7 @@ def autocorrelation_plot(series, ax=None): import matplotlib.pyplot as plt n = len(series) data = np.asarray(series) - if ax == None: + if ax is None: ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) mean = np.mean(data) c0 = np.sum((data - mean) ** 2) / float(n) @@ -615,6 +617,7 @@ def plot_group(group, ax): hspace=0.5, wspace=0.3) return axes + class MPLPlot(object): """ Base class for assembling a pandas plot using matplotlib @@ -691,11 +694,10 @@ def _validate_color_args(self): if ('color' in self.kwds and (isinstance(self.data, Series) or - isinstance(self.data, DataFrame) and len(self.data.columns) ==1 )): - #support series.plot(color='green') + isinstance(self.data, DataFrame) and len(self.data.columns) == 1)): + # support series.plot(color='green') self.kwds['color'] = [self.kwds['color']] - def _iter_data(self): from pandas.core.frame import DataFrame if isinstance(self.data, (Series, np.ndarray)): @@ -940,7 +942,7 @@ def on_right(self, i): return self.secondary_y if (isinstance(self.data, DataFrame) and - isinstance(self.secondary_y, (tuple, list, np.ndarray))): + isinstance(self.secondary_y, (tuple, list, np.ndarray))): return self.data.columns[i] in self.secondary_y def _get_style(self, i, col_name): @@ -978,7 +980,7 @@ def _make_plot(self): gkde = gaussian_kde(y) sample_range = max(y) - min(y) ind = np.linspace(min(y) - 0.5 * sample_range, - max(y) + 0.5 * sample_range, 1000) + max(y) + 0.5 * sample_range, 1000) ax.set_ylabel("Density") y = gkde.evaluate(ind) @@ -1005,7 +1007,7 @@ def __init__(self, data, **kwargs): MPLPlot.__init__(self, data, **kwargs) self.x_compat = plot_params['x_compat'] if 'x_compat' in self.kwds: - self.x_compat = bool(self.kwds.pop('x_compat')) + self.x_compat = bool(self.kwds.pop('x_compat')) def _index_freq(self): from pandas.core.frame import DataFrame @@ -1074,7 +1076,7 @@ def _make_plot(self): kwds = self.kwds.copy() self._maybe_add_color(colors, kwds, style, i) - label = com.pprint_thing(label) # .encode('utf-8') + label = com.pprint_thing(label) # .encode('utf-8') mask = com.isnull(y) if mask.any(): @@ -1178,7 +1180,7 @@ def _maybe_convert_index(self, data): # over and over for DataFrames from pandas.core.frame import DataFrame if (isinstance(data.index, DatetimeIndex) and - isinstance(data, DataFrame)): + isinstance(data, DataFrame)): freq = getattr(data.index, 'freq', None) if freq is None: @@ -1280,7 +1282,8 @@ def _make_plot(self): if self.subplots: ax = self._get_ax(i) # self.axes[i] - rect = bar_f(ax, self.ax_pos, y, self.bar_width, start=pos_prior, **kwds) + rect = bar_f(ax, self.ax_pos, y, + self.bar_width, start=pos_prior, **kwds) ax.set_title(label) elif self.stacked: mask = y > 0 @@ -1296,7 +1299,7 @@ def _make_plot(self): labels.append(label) if self.legend and not self.subplots: - patches =[r[0] for r in rects] + patches = [r[0] for r in rects] self.axes[0].legend(patches, labels, loc='best', title=self.legend_title) @@ -1323,7 +1326,7 @@ def _post_plot_logic(self): if name is not None: ax.set_ylabel(name) - #if self.subplots and self.legend: + # if self.subplots and self.legend: # self.axes[0].legend(loc='best') @@ -1442,6 +1445,7 @@ def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True, else: return plot_obj.axes[0] + def plot_series(series, label=None, kind='line', use_index=True, rot=None, xticks=None, yticks=None, xlim=None, ylim=None, ax=None, style=None, grid=None, legend=False, logx=False, @@ -1556,7 +1560,7 @@ def plot_group(grouped, ax): else: ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize) - if column == None: + if column is None: columns = None else: if isinstance(column, (list, tuple)): @@ -1645,9 +1649,10 @@ def plot_group(group, ax): return fig -def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, - ylabelsize=None, yrot=None, ax=None, - sharex=False, sharey=False, **kwds): +def hist_frame( + data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, + ylabelsize=None, yrot=None, ax=None, + sharex=False, sharey=False, **kwds): """ Draw Histogram the DataFrame's series using matplotlib / pylab. @@ -1726,6 +1731,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None return axes + def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, **kwds): """ @@ -2068,7 +2074,7 @@ def on_right(i): # Note off-by-one counting because add_subplot uses the MATLAB 1-based # convention. for i in range(1, nplots): - ax = fig.add_subplot(nrows, ncols, i+1, **subplot_kw) + ax = fig.add_subplot(nrows, ncols, i + 1, **subplot_kw) if on_right(i): orig_ax = ax ax = ax.twinx() @@ -2079,11 +2085,13 @@ def on_right(i): if sharex and nrows > 1: for i, ax in enumerate(axarr): if np.ceil(float(i + 1) / ncols) < nrows: # only last row - [label.set_visible(False) for label in ax.get_xticklabels()] + [label.set_visible( + False) for label in ax.get_xticklabels()] if sharey and ncols > 1: for i, ax in enumerate(axarr): if (i % ncols) != 0: # only first column - [label.set_visible(False) for label in ax.get_yticklabels()] + [label.set_visible( + False) for label in ax.get_yticklabels()] if squeeze: # Reshape the array to have the final desired dimension (nrow,ncol), @@ -2099,6 +2107,7 @@ def on_right(i): return fig, axes + def _get_xlim(lines): import pandas.tseries.converter as conv left, right = np.inf, -np.inf @@ -2108,6 +2117,7 @@ def _get_xlim(lines): right = max(_maybe_convert_date(x[-1]), right) return left, right + def _maybe_convert_date(x): if not com.is_integer(x): conv_func = conv._dt_to_float_ordinal diff --git a/pandas/tools/tests/__init__.py b/pandas/tools/tests/__init__.py index 8b137891791fe..e69de29bb2d1d 100644 --- a/pandas/tools/tests/__init__.py +++ b/pandas/tools/tests/__init__.py @@ -1 +0,0 @@ - diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 91c278654e7ef..178fa5f19d8ca 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -34,6 +34,7 @@ def get_test_data(ngroups=NGROUPS, n=N): random.shuffle(arr) return arr + class TestMerge(unittest.TestCase): _multiprocess_can_split_ = True @@ -48,9 +49,9 @@ def setUp(self): # exclude a couple keys for fun self.df = self.df[self.df['key2'] > 1] - self.df2 = DataFrame({'key1' : get_test_data(n=N//5), - 'key2' : get_test_data(ngroups=NGROUPS//2, - n=N//5), + self.df2 = DataFrame({'key1': get_test_data(n=N // 5), + 'key2': get_test_data(ngroups=NGROUPS // 2, + n=N // 5), 'value': np.random.randn(N // 5)}) index, data = tm.getMixedTypeDict() @@ -61,9 +62,9 @@ def setUp(self): index=data['C']) self.left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], - 'v1': np.random.randn(7)}) + 'v1': np.random.randn(7)}) self.right = DataFrame({'v2': np.random.randn(4)}, - index=['d', 'b', 'c', 'a']) + index=['d', 'b', 'c', 'a']) def test_cython_left_outer_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) @@ -94,14 +95,14 @@ def test_cython_right_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - rs, ls = algos.left_outer_join(right, left, max_group) + rs, ls = algos.left_outer_join(right, left, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') # 0 1 1 1 exp_li = a_([0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5, - # 2 2 4 + # 2 2 4 6, 7, 8, 6, 7, 8, -1]) exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6]) @@ -278,7 +279,7 @@ def test_join_on_series_buglet(self): df = DataFrame({'a': [1, 1]}) ds = Series([2], index=[1], name='b') result = df.join(ds, on='a') - expected = DataFrame({'a' : [1, 1], + expected = DataFrame({'a': [1, 1], 'b': [2, 2]}, index=df.index) tm.assert_frame_equal(result, expected) @@ -327,22 +328,22 @@ def test_join_empty_bug(self): def test_join_unconsolidated(self): # GH #331 - a = DataFrame(randn(30,2), columns=['a','b']) + a = DataFrame(randn(30, 2), columns=['a', 'b']) c = Series(randn(30)) a['c'] = c - d = DataFrame(randn(30,1), columns=['q']) + d = DataFrame(randn(30, 1), columns=['q']) # it works! a.join(d) d.join(a) def test_join_multiindex(self): - index1 = MultiIndex.from_arrays([['a','a','a','b','b','b'], - [1,2,3,1,2,3]], + index1 = MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b'], + [1, 2, 3, 1, 2, 3]], names=['first', 'second']) - index2 = MultiIndex.from_arrays([['b','b','b','c','c','c'], - [1,2,3,1,2,3]], + index2 = MultiIndex.from_arrays([['b', 'b', 'b', 'c', 'c', 'c'], + [1, 2, 3, 1, 2, 3]], names=['first', 'second']) df1 = DataFrame(data=np.random.randn(6), index=index1, @@ -371,9 +372,9 @@ def test_join_multiindex(self): def test_join_inner_multiindex(self): key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux', - 'qux', 'snap'] + 'qux', 'snap'] key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two', - 'three', 'one'] + 'three', 'one'] data = np.random.randn(len(key1)) data = DataFrame({'key1': key1, 'key2': key2, @@ -410,9 +411,10 @@ def test_join_inner_multiindex(self): # _assert_same_contents(expected, expected2.ix[:, expected.columns]) def test_join_hierarchical_mixed(self): - df = DataFrame([(1,2,3), (4,5,6)], columns = ['a','b','c']) + df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=['a', 'b', 'c']) new_df = df.groupby(['a']).agg({'b': [np.mean, np.sum]}) - other_df = DataFrame([(1,2,3), (7,10,6)], columns = ['a','b','d']) + other_df = DataFrame( + [(1, 2, 3), (7, 10, 6)], columns=['a', 'b', 'd']) other_df.set_index('a', inplace=True) result = merge(new_df, other_df, left_index=True, right_index=True) @@ -420,8 +422,8 @@ def test_join_hierarchical_mixed(self): self.assertTrue('b' in result) def test_join_float64_float32(self): - a = DataFrame(randn(10,2), columns=['a','b']) - b = DataFrame(randn(10,1), columns=['c']).astype(np.float32) + a = DataFrame(randn(10, 2), columns=['a', 'b']) + b = DataFrame(randn(10, 1), columns=['c']).astype(np.float32) joined = a.join(b) expected = a.join(b.astype('f8')) assert_frame_equal(joined, expected) @@ -432,17 +434,17 @@ def test_join_float64_float32(self): a = np.random.randint(0, 5, 100) b = np.random.random(100).astype('Float64') c = np.random.random(100).astype('Float32') - df = DataFrame({'a': a, 'b' : b, 'c' : c}) - xpdf = DataFrame({'a': a, 'b' : b, 'c' : c.astype('Float64')}) + df = DataFrame({'a': a, 'b': b, 'c': c}) + xpdf = DataFrame({'a': a, 'b': b, 'c': c.astype('Float64')}) s = DataFrame(np.random.random(5).astype('f'), columns=['md']) rs = df.merge(s, left_on='a', right_index=True) xp = xpdf.merge(s.astype('f8'), left_on='a', right_index=True) assert_frame_equal(rs, xp) def test_join_many_non_unique_index(self): - df1 = DataFrame({"a": [1,1], "b": [1,1], "c": [10,20]}) - df2 = DataFrame({"a": [1,1], "b": [1,2], "d": [100,200]}) - df3 = DataFrame({"a": [1,1], "b": [1,2], "e": [1000,2000]}) + df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]}) + df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]}) + df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]}) idf1 = df1.set_index(["a", "b"]) idf2 = df2.set_index(["a", "b"]) idf3 = df3.set_index(["a", "b"]) @@ -459,9 +461,10 @@ def test_join_many_non_unique_index(self): assert_frame_equal(result, expected.ix[:, result.columns]) - df1 = DataFrame({"a": [1, 1, 1], "b": [1,1, 1], "c": [10,20, 30]}) - df2 = DataFrame({"a": [1, 1, 1], "b": [1,1, 2], "d": [100,200, 300]}) - df3 = DataFrame({"a": [1, 1, 1], "b": [1,1, 2], "e": [1000,2000, 3000]}) + df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]}) + df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]}) + df3 = DataFrame( + {"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]}) idf1 = df1.set_index(["a", "b"]) idf2 = df2.set_index(["a", "b"]) idf3 = df3.set_index(["a", "b"]) @@ -478,7 +481,7 @@ def test_merge_index_singlekey_right_vs_left(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) right = DataFrame({'v2': np.random.randn(4)}, - index=['d', 'b', 'c', 'a']) + index=['d', 'b', 'c', 'a']) merged1 = merge(left, right, left_on='key', right_index=True, how='left', sort=False) @@ -496,7 +499,7 @@ def test_merge_index_singlekey_inner(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) right = DataFrame({'v2': np.random.randn(4)}, - index=['d', 'b', 'c', 'a']) + index=['d', 'b', 'c', 'a']) # inner join result = merge(left, right, left_on='key', right_index=True, @@ -532,7 +535,7 @@ def test_merge_different_column_key_names(self): left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 4]}) right = DataFrame({'rkey': ['foo', 'bar', 'qux', 'foo'], - 'value' : [5, 6, 7, 8]}) + 'value': [5, 6, 7, 8]}) merged = left.merge(right, left_on='lkey', right_on='rkey', how='outer', sort=True) @@ -545,8 +548,8 @@ def test_merge_different_column_key_names(self): assert_almost_equal(merged['value_y'], [6, np.nan, 5, 8, 5, 8, 7]) def test_merge_nocopy(self): - left = DataFrame({'a' : 0, 'b' : 1}, index=range(10)) - right = DataFrame({'c' : 'foo', 'd' : 'bar'}, index=range(10)) + left = DataFrame({'a': 0, 'b': 1}, index=range(10)) + right = DataFrame({'c': 'foo', 'd': 'bar'}, index=range(10)) merged = merge(left, right, left_index=True, right_index=True, copy=False) @@ -558,15 +561,15 @@ def test_merge_nocopy(self): self.assert_((right['d'] == 'peekaboo').all()) def test_join_sort(self): - left = DataFrame({'key' : ['foo', 'bar', 'baz', 'foo'], - 'value' : [1, 2, 3, 4]}) - right = DataFrame({'value2' : ['a', 'b', 'c']}, + left = DataFrame({'key': ['foo', 'bar', 'baz', 'foo'], + 'value': [1, 2, 3, 4]}) + right = DataFrame({'value2': ['a', 'b', 'c']}, index=['bar', 'baz', 'foo']) joined = left.join(right, on='key', sort=True) - expected = DataFrame({'key' : ['bar', 'baz', 'foo', 'foo'], - 'value' : [2, 3, 1, 4], - 'value2' : ['a', 'b', 'c', 'c']}, + expected = DataFrame({'key': ['bar', 'baz', 'foo', 'foo'], + 'value': [2, 3, 1, 4], + 'value2': ['a', 'b', 'c', 'c']}, index=[1, 2, 0, 3]) assert_frame_equal(joined, expected) @@ -577,25 +580,25 @@ def test_join_sort(self): def test_intelligently_handle_join_key(self): # #733, be a bit more 1337 about not returning unconsolidated DataFrame - left = DataFrame({'key' : [1, 1, 2, 2, 3], - 'value' : range(5)}, columns=['value', 'key']) - right = DataFrame({'key' : [1, 1, 2, 3, 4, 5], - 'rvalue' : range(6)}) + left = DataFrame({'key': [1, 1, 2, 2, 3], + 'value': range(5)}, columns=['value', 'key']) + right = DataFrame({'key': [1, 1, 2, 3, 4, 5], + 'rvalue': range(6)}) joined = merge(left, right, on='key', how='outer') - expected = DataFrame({'key' : [1, 1, 1, 1, 2, 2, 3, 4, 5.], - 'value' : np.array([0, 0, 1, 1, 2, 3, 4, - np.nan, np.nan]), - 'rvalue' : np.array([0, 1, 0, 1, 2, 2, 3, 4, 5])}, + expected = DataFrame({'key': [1, 1, 1, 1, 2, 2, 3, 4, 5.], + 'value': np.array([0, 0, 1, 1, 2, 3, 4, + np.nan, np.nan]), + 'rvalue': np.array([0, 1, 0, 1, 2, 2, 3, 4, 5])}, columns=['value', 'key', 'rvalue']) assert_frame_equal(joined, expected) self.assert_(joined._data.is_consolidated()) def test_handle_join_key_pass_array(self): - left = DataFrame({'key' : [1, 1, 2, 2, 3], - 'value' : range(5)}, columns=['value', 'key']) - right = DataFrame({'rvalue' : range(6)}) + left = DataFrame({'key': [1, 1, 2, 2, 3], + 'value': range(5)}, columns=['value', 'key']) + right = DataFrame({'rvalue': range(6)}) key = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on='key', right_on=key, how='outer') @@ -605,8 +608,8 @@ def test_handle_join_key_pass_array(self): self.assert_(merged['key'].notnull().all()) self.assert_(merged2['key'].notnull().all()) - left = DataFrame({'value' : range(5)}, columns=['value']) - right = DataFrame({'rvalue' : range(6)}) + left = DataFrame({'value': range(5)}, columns=['value']) + right = DataFrame({'rvalue': range(6)}) lkey = np.array([1, 1, 2, 2, 3]) rkey = np.array([1, 1, 2, 3, 4, 5]) @@ -615,7 +618,7 @@ def test_handle_join_key_pass_array(self): np.array([1, 1, 1, 1, 2, 2, 3, 4, 5]))) left = DataFrame({'value': range(3)}) - right = DataFrame({'rvalue' : range(6)}) + right = DataFrame({'rvalue': range(6)}) key = np.array([0, 1, 1, 2, 2, 3]) merged = merge(left, right, left_index=True, right_on=key, how='outer') @@ -669,7 +672,7 @@ def test_merge_non_unique_index_many_to_many(self): dt3 = datetime(2012, 5, 3) df1 = DataFrame({'x': ['a', 'b', 'c', 'd']}, index=[dt2, dt2, dt, dt]) - df2 = DataFrame({'y': ['e', 'f', 'g',' h', 'i']}, + df2 = DataFrame({'y': ['e', 'f', 'g', ' h', 'i']}, index=[dt2, dt2, dt3, dt, dt]) _check_merge(df1, df2) @@ -688,18 +691,21 @@ def test_merge_nosort(self): from datetime import datetime - d = {"var1" : np.random.randint(0, 10, size=10), - "var2" : np.random.randint(0, 10, size=10), - "var3" : [datetime(2012, 1, 12), datetime(2011, 2, 4), - datetime(2010, 2, 3), datetime(2012, 1, 12), - datetime(2011, 2, 4), datetime(2012, 4, 3), - datetime(2012, 3, 4), datetime(2008, 5, 1), - datetime(2010, 2, 3), datetime(2012, 2, 3)]} + d = {"var1": np.random.randint(0, 10, size=10), + "var2": np.random.randint(0, 10, size=10), + "var3": [datetime(2012, 1, 12), datetime(2011, 2, 4), + datetime( + 2010, 2, 3), datetime(2012, 1, 12), + datetime( + 2011, 2, 4), datetime(2012, 4, 3), + datetime( + 2012, 3, 4), datetime(2008, 5, 1), + datetime(2010, 2, 3), datetime(2012, 2, 3)]} df = DataFrame.from_dict(d) var3 = df.var3.unique() var3.sort() - new = DataFrame.from_dict({"var3" : var3, - "var8" : np.random.random(7)}) + new = DataFrame.from_dict({"var3": var3, + "var8": np.random.random(7)}) result = df.merge(new, on="var3", sort=False) exp = merge(df, new, on='var3', sort=False) @@ -707,6 +713,7 @@ def test_merge_nosort(self): self.assert_((df.var3.unique() == result.var3.unique()).all()) + def _check_merge(x, y): for how in ['inner', 'left', 'outer']: result = x.join(y, how=how) @@ -717,6 +724,7 @@ def _check_merge(x, y): assert_frame_equal(result, expected) + class TestMergeMulti(unittest.TestCase): def setUp(self): @@ -735,8 +743,8 @@ def setUp(self): 'three', 'one'] data = np.random.randn(len(key1)) - self.data = DataFrame({'key1' : key1, 'key2' : key2, - 'data' : data}) + self.data = DataFrame({'key1': key1, 'key2': key2, + 'data': data}) def test_merge_on_multikey(self): joined = self.data.join(self.to_join, on=['key1', 'key2']) @@ -767,23 +775,23 @@ def test_compress_group_combinations(self): key1 = np.tile(key1, 2) key2 = key1[::-1] - df = DataFrame({'key1' : key1, 'key2' : key2, - 'value1' : np.random.randn(20000)}) + df = DataFrame({'key1': key1, 'key2': key2, + 'value1': np.random.randn(20000)}) - df2 = DataFrame({'key1' : key1[::2], 'key2' : key2[::2], - 'value2' : np.random.randn(10000)}) + df2 = DataFrame({'key1': key1[::2], 'key2': key2[::2], + 'value2': np.random.randn(10000)}) # just to hit the label compression code path merged = merge(df, df2, how='outer') def test_left_join_index_preserve_order(self): - left = DataFrame({'k1' : [0, 1, 2] * 8, - 'k2' : ['foo', 'bar'] * 12, - 'v' : np.arange(24)}) + left = DataFrame({'k1': [0, 1, 2] * 8, + 'k2': ['foo', 'bar'] * 12, + 'v': np.arange(24)}) index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')]) - right = DataFrame({'v2' : [5, 7]}, index=index) + right = DataFrame({'v2': [5, 7]}, index=index) result = left.join(right, on=['k1', 'k2']) @@ -801,11 +809,11 @@ def test_left_join_index_preserve_order(self): def test_left_merge_na_buglet(self): left = DataFrame({'id': list('abcde'), 'v1': randn(5), - 'v2': randn(5), 'dummy' : list('abcde'), - 'v3' : randn(5)}, + 'v2': randn(5), 'dummy': list('abcde'), + 'v3': randn(5)}, columns=['id', 'v1', 'v2', 'dummy', 'v3']) - right = DataFrame({'id' : ['a', 'b', np.nan, np.nan, np.nan], - 'sv3' : [1.234, 5.678, np.nan, np.nan, np.nan]}) + right = DataFrame({'id': ['a', 'b', np.nan, np.nan, np.nan], + 'sv3': [1.234, 5.678, np.nan, np.nan, np.nan]}) merged = merge(left, right, on='id', how='left') @@ -894,8 +902,9 @@ def _restrict_to_columns(group, columns, suffix): return group + def _assert_same_contents(join_chunk, source): - NA_SENTINEL = -1234567 # drop_duplicates not so NA-friendly... + NA_SENTINEL = -1234567 # drop_duplicates not so NA-friendly... jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values svalues = source.fillna(NA_SENTINEL).drop_duplicates().values @@ -904,6 +913,7 @@ def _assert_same_contents(join_chunk, source): assert(len(rows) == len(source)) assert(all(tuple(row) in rows for row in svalues)) + def _assert_all_na(join_chunk, source_columns, join_col): for c in source_columns: if c in join_col: @@ -923,6 +933,7 @@ def _join_by_hand(a, b, how='left'): a_re[col] = s return a_re.reindex(columns=result_columns) + class TestConcatenate(unittest.TestCase): _multiprocess_can_split_ = True @@ -958,8 +969,9 @@ def test_append(self): mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:]) # all equal except 'foo' column - assert_frame_equal(mixed_appended.reindex(columns=['A', 'B', 'C', 'D']), - mixed_appended2.reindex(columns=['A', 'B', 'C', 'D'])) + assert_frame_equal( + mixed_appended.reindex(columns=['A', 'B', 'C', 'D']), + mixed_appended2.reindex(columns=['A', 'B', 'C', 'D'])) # append empty empty = DataFrame({}) @@ -985,12 +997,12 @@ def test_append_length0_frame(self): assert_frame_equal(df5, expected) def test_append_records(self): - arr1 = np.zeros((2,),dtype=('i4,f4,a10')) - arr1[:] = [(1,2.,'Hello'),(2,3.,"World")] + arr1 = np.zeros((2,), dtype=('i4,f4,a10')) + arr1[:] = [(1, 2., 'Hello'), (2, 3., "World")] - arr2 = np.zeros((3,),dtype=('i4,f4,a10')) - arr2[:] = [(3, 4.,'foo'), - (5, 6.,"bar"), + arr2 = np.zeros((3,), dtype=('i4,f4,a10')) + arr2[:] = [(3, 4., 'foo'), + (5, 6., "bar"), (7., 8., 'baz')] df1 = DataFrame(arr1) @@ -1001,10 +1013,10 @@ def test_append_records(self): assert_frame_equal(result, expected) def test_append_different_columns(self): - df = DataFrame({'bools' : np.random.randn(10) > 0, - 'ints' : np.random.randint(0, 10, 10), - 'floats' : np.random.randn(10), - 'strings' : ['foo', 'bar'] * 5}) + df = DataFrame({'bools': np.random.randn(10) > 0, + 'ints': np.random.randint(0, 10, 10), + 'floats': np.random.randn(10), + 'strings': ['foo', 'bar'] * 5}) a = df[:5].ix[:, ['bools', 'ints', 'floats']] b = df[5:].ix[:, ['strings', 'ints', 'floats']] @@ -1028,10 +1040,10 @@ def test_append_many(self): def test_append_preserve_index_name(self): # #980 - df1 = DataFrame(data=None, columns=['A','B','C']) + df1 = DataFrame(data=None, columns=['A', 'B', 'C']) df1 = df1.set_index(['A']) - df2 = DataFrame(data=[[1,4,7], [2,5,8], [3,6,9]], - columns=['A','B','C']) + df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], + columns=['A', 'B', 'C']) df2 = df2.set_index(['A']) result = df1.append(df2) @@ -1052,7 +1064,6 @@ def _check_diff_index(df_list, result, exp_index): expected = reindexed[0].join(reindexed[1:]) tm.assert_frame_equal(result, expected) - # different join types joined = df_list[0].join(df_list[1:], how='outer') _check_diff_index(df_list, joined, df.index) @@ -1066,7 +1077,7 @@ def _check_diff_index(df_list, result, exp_index): self.assertRaises(ValueError, df_list[0].join, df_list[1:], on='a') def test_join_many_mixed(self): - df = DataFrame(np.random.randn(8, 4), columns=['A','B','C','D']) + df = DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']) df['key'] = ['foo', 'bar'] * 4 df1 = df.ix[:, ['A', 'B']] df2 = df.ix[:, ['C', 'D']] @@ -1076,9 +1087,9 @@ def test_join_many_mixed(self): assert_frame_equal(result, df) def test_append_missing_column_proper_upcast(self): - df1 = DataFrame({'A' : np.array([1,2, 3, 4], dtype='i8')}) - df2 = DataFrame({'B' : np.array([True,False, True, False], - dtype=bool)}) + df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8')}) + df2 = DataFrame({'B': np.array([True, False, True, False], + dtype=bool)}) appended = df1.append(df2, ignore_index=True) self.assert_(appended['A'].dtype == 'f8') @@ -1132,10 +1143,10 @@ def test_concat_keys_specific_levels(self): self.assertEqual(result.columns.names[0], 'group_key') def test_concat_dataframe_keys_bug(self): - t1 = DataFrame({'value': Series([1,2,3], + t1 = DataFrame({'value': Series([1, 2, 3], index=Index(['a', 'b', 'c'], name='id'))}) t2 = DataFrame({'value': Series([7, 8], - index=Index(['a', 'b'], name = 'id'))}) + index=Index(['a', 'b'], name='id'))}) # it works result = concat([t1, t2], axis=1, keys=['t1', 't2']) @@ -1143,10 +1154,10 @@ def test_concat_dataframe_keys_bug(self): ('t2', 'value')]) def test_concat_dict(self): - frames = {'foo' : DataFrame(np.random.randn(4, 3)), - 'bar' : DataFrame(np.random.randn(4, 3)), - 'baz' : DataFrame(np.random.randn(4, 3)), - 'qux' : DataFrame(np.random.randn(4, 3))} + frames = {'foo': DataFrame(np.random.randn(4, 3)), + 'bar': DataFrame(np.random.randn(4, 3)), + 'baz': DataFrame(np.random.randn(4, 3)), + 'qux': DataFrame(np.random.randn(4, 3))} sorted_keys = sorted(frames) @@ -1166,7 +1177,7 @@ def test_concat_dict(self): def test_concat_ignore_index(self): frame1 = DataFrame({"test1": ["a", "b", "c"], - "test2": [1,2,3], + "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]}) frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]}) frame1.index = Index(["x", "y", "z"]) @@ -1175,7 +1186,7 @@ def test_concat_ignore_index(self): v1 = concat([frame1, frame2], axis=1, ignore_index=True) nan = np.nan - expected = DataFrame([[nan,nan,nan, 4.3], + expected = DataFrame([[nan, nan, nan, 4.3], ['a', 1, 4.5, 5.2], ['b', 2, 3.2, 2.2], ['c', 3, 1.2, nan]], @@ -1246,10 +1257,10 @@ def test_concat_keys_levels_no_overlap(self): keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) def test_concat_rename_index(self): - a = DataFrame(np.random.rand(3,3), + a = DataFrame(np.random.rand(3, 3), columns=list('ABC'), index=Index(list('abc'), name='index_a')) - b = DataFrame(np.random.rand(3,3), + b = DataFrame(np.random.rand(3, 3), columns=list('ABC'), index=Index(list('abc'), name='index_b')) @@ -1264,16 +1275,16 @@ def test_concat_rename_index(self): def test_crossed_dtypes_weird_corner(self): columns = ['A', 'B', 'C', 'D'] - df1 = DataFrame({'A' : np.array([1, 2, 3, 4], dtype='f8'), - 'B' : np.array([1, 2, 3, 4], dtype='i8'), - 'C' : np.array([1, 2, 3, 4], dtype='f8'), - 'D' : np.array([1, 2, 3, 4], dtype='i8')}, + df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='f8'), + 'B': np.array([1, 2, 3, 4], dtype='i8'), + 'C': np.array([1, 2, 3, 4], dtype='f8'), + 'D': np.array([1, 2, 3, 4], dtype='i8')}, columns=columns) - df2 = DataFrame({'A' : np.array([1, 2, 3, 4], dtype='i8'), - 'B' : np.array([1, 2, 3, 4], dtype='f8'), - 'C' : np.array([1, 2, 3, 4], dtype='i8'), - 'D' : np.array([1, 2, 3, 4], dtype='f8')}, + df2 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8'), + 'B': np.array([1, 2, 3, 4], dtype='f8'), + 'C': np.array([1, 2, 3, 4], dtype='i8'), + 'D': np.array([1, 2, 3, 4], dtype='f8')}, columns=columns) appended = df1.append(df2, ignore_index=True) @@ -1283,7 +1294,8 @@ def test_crossed_dtypes_weird_corner(self): df = DataFrame(np.random.randn(1, 3), index=['a']) df2 = DataFrame(np.random.randn(1, 4), index=['b']) - result = concat([df, df2], keys=['one', 'two'], names=['first', 'second']) + result = concat( + [df, df2], keys=['one', 'two'], names=['first', 'second']) self.assertEqual(result.index.names, ['first', 'second']) def test_handle_empty_objects(self): @@ -1411,25 +1423,26 @@ def test_panel_concat_buglet(self): # #2257 def make_panel(): index = 5 - cols = 3 + cols = 3 + def df(): - return DataFrame(np.random.randn(index,cols), - index = [ "I%s" % i for i in range(index) ], - columns = [ "C%s" % i for i in range(cols) ]) - return Panel(dict([("Item%s" % x, df()) for x in ['A','B','C']])) + return DataFrame(np.random.randn(index, cols), + index=["I%s" % i for i in range(index)], + columns=["C%s" % i for i in range(cols)]) + return Panel(dict([("Item%s" % x, df()) for x in ['A', 'B', 'C']])) panel1 = make_panel() panel2 = make_panel() - panel2 = panel2.rename_axis(dict([ (x,"%s_1" % x) - for x in panel2.major_axis ]), + panel2 = panel2.rename_axis(dict([(x, "%s_1" % x) + for x in panel2.major_axis]), axis=1) panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) # it works! - concat([ panel1, panel3 ], axis = 1, verify_integrity = True) + concat([panel1, panel3], axis=1, verify_integrity=True) def test_panel4d_concat(self): p4d = tm.makePanel4D() @@ -1561,11 +1574,12 @@ def test_concat_bug_1719(self): ## to join with union ## these two are of different length! - left = concat([ts1,ts2], join='outer', axis = 1) - right = concat([ts2,ts1], join='outer', axis = 1) + left = concat([ts1, ts2], join='outer', axis=1) + right = concat([ts2, ts1], join='outer', axis=1) self.assertEqual(len(left), len(right)) + class TestOrderedMerge(unittest.TestCase): def setUp(self): @@ -1586,7 +1600,8 @@ def test_basic(self): assert_frame_equal(result, expected) def test_ffill(self): - result = ordered_merge(self.left, self.right, on='key', fill_method='ffill') + result = ordered_merge( + self.left, self.right, on='key', fill_method='ffill') expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'], 'lvalue': [1., 1, 2, 2, 3, 3.], 'rvalue': [nan, 1, 2, 3, 3, 4]}) @@ -1617,5 +1632,5 @@ def test_multigroup(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 133f6757f0a8d..c5cf483fb1450 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -7,27 +7,28 @@ from pandas.tools.pivot import pivot_table, crosstab import pandas.util.testing as tm + class TestPivotTable(unittest.TestCase): _multiprocess_can_split_ = True def setUp(self): - self.data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B' : ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C' : ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D' : np.random.randn(11), - 'E' : np.random.randn(11), - 'F' : np.random.randn(11)}) + self.data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', + 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', + 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', + 'dull', 'shiny', 'shiny', 'dull', + 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) def test_pivot_table(self): rows = ['A', 'B'] - cols= 'C' + cols = 'C' table = pivot_table(self.data, values='D', rows=rows, cols=cols) table2 = self.data.pivot_table(values='D', rows=rows, cols=cols) @@ -63,7 +64,7 @@ def test_pass_function(self): def test_pivot_table_multiple(self): rows = ['A', 'B'] - cols= 'C' + cols = 'C' table = pivot_table(self.data, rows=rows, cols=cols) expected = self.data.groupby(rows + [cols]).agg(np.mean).unstack() tm.assert_frame_equal(table, expected) @@ -136,7 +137,7 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']): # no rows rtable = self.data.pivot_table(cols=['AA', 'BB'], margins=True, - aggfunc=np.mean) + aggfunc=np.mean) self.assert_(isinstance(rtable, Series)) for item in ['DD', 'EE', 'FF']: gmarg = table[item]['All', ''] @@ -150,12 +151,12 @@ def test_pivot_integer_columns(self): d = datetime.date.min data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'], - [d + datetime.timedelta(i) for i in xrange(20)], [1.0])) + [d + datetime.timedelta(i) for i in xrange(20)], [1.0])) df = pandas.DataFrame(data) - table = df.pivot_table(values=4, rows=[0,1,3],cols=[2]) + table = df.pivot_table(values=4, rows=[0, 1, 3], cols=[2]) df2 = df.rename(columns=str) - table2 = df2.pivot_table(values='4', rows=['0','1','3'], cols=['2']) + table2 = df2.pivot_table(values='4', rows=['0', '1', '3'], cols=['2']) tm.assert_frame_equal(table, table2) @@ -221,21 +222,22 @@ def test_pivot_columns_lexsorted(self): self.assert_(pivoted.columns.is_monotonic) + class TestCrosstab(unittest.TestCase): def setUp(self): - df = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B' : ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C' : ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D' : np.random.randn(11), - 'E' : np.random.randn(11), - 'F' : np.random.randn(11)}) + df = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', + 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', + 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', + 'dull', 'shiny', 'shiny', 'dull', + 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) self.df = df.append(df, ignore_index=True) @@ -250,7 +252,8 @@ def test_crosstab_multiple(self): result = crosstab(df['A'], [df['B'], df['C']]) expected = df.groupby(['A', 'B', 'C']).size() - expected = expected.unstack('B').unstack('C').fillna(0).astype(np.int64) + expected = expected.unstack( + 'B').unstack('C').fillna(0).astype(np.int64) tm.assert_frame_equal(result, expected) result = crosstab([df['B'], df['C']], df['A']) @@ -314,7 +317,7 @@ def test_crosstab_pass_values(self): table = crosstab([a, b], c, values, aggfunc=np.sum, rownames=['foo', 'bar'], colnames=['baz']) - df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values' : values}) + df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values': values}) expected = df.pivot_table('values', rows=['foo', 'bar'], cols='baz', aggfunc=np.sum) @@ -322,5 +325,5 @@ def test_crosstab_pass_values(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index c571f2739f434..f886d545bb395 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -14,6 +14,7 @@ from numpy.testing import assert_equal, assert_almost_equal + class TestCut(unittest.TestCase): def test_simple(self): @@ -26,7 +27,7 @@ def test_bins(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]) result, bins = cut(data, 3, retbins=True) assert_equal(result.labels, [0, 0, 0, 1, 2, 0]) - assert_almost_equal(bins, [ 0.1905, 3.36666667, 6.53333333, 9.7]) + assert_almost_equal(bins, [0.1905, 3.36666667, 6.53333333, 9.7]) def test_right(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) @@ -38,13 +39,13 @@ def test_noright(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) result, bins = cut(data, 4, right=False, retbins=True) assert_equal(result.labels, [0, 0, 0, 2, 3, 0, 1]) - assert_almost_equal(bins, [ 0.2, 2.575, 4.95, 7.325, 9.7095]) + assert_almost_equal(bins, [0.2, 2.575, 4.95, 7.325, 9.7095]) def test_arraylike(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] result, bins = cut(data, 3, retbins=True) assert_equal(result.labels, [0, 0, 0, 1, 2, 0]) - assert_almost_equal(bins, [ 0.1905, 3.36666667, 6.53333333, 9.7]) + assert_almost_equal(bins, [0.1905, 3.36666667, 6.53333333, 9.7]) def test_bins_not_monotonic(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] @@ -185,7 +186,6 @@ def test_label_formatting(self): result = tmod._format_label(117.9998, precision=3) self.assertEquals(result, '118') - def test_qcut_binning_issues(self): # #1978, 1979 path = os.path.join(curpath(), 'cut_data.csv') @@ -210,13 +210,12 @@ def test_qcut_binning_issues(self): self.assertTrue(ep < en) self.assertTrue(ep <= sn) + def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) - - diff --git a/pandas/tools/tests/test_tools.py b/pandas/tools/tests/test_tools.py index baaf78cabcd32..b57ff68c97e3d 100644 --- a/pandas/tools/tests/test_tools.py +++ b/pandas/tools/tests/test_tools.py @@ -1,21 +1,22 @@ -#import unittest +# import unittest from pandas import DataFrame from pandas.tools.describe import value_range import numpy as np + def test_value_range(): df = DataFrame(np.random.randn(5, 5)) - df.ix[0,2] = -5 - df.ix[2,0] = 5 + df.ix[0, 2] = -5 + df.ix[2, 0] = 5 res = value_range(df) - assert( res['Minimum'] == -5 ) - assert( res['Maximum'] == 5 ) + assert(res['Minimum'] == -5) + assert(res['Maximum'] == 5) - df.ix[0,1] = np.NaN + df.ix[0, 1] = np.NaN - assert( res['Minimum'] == -5 ) - assert( res['Maximum'] == 5 ) + assert(res['Minimum'] == -5) + assert(res['Maximum'] == 5) diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index c17a51a7cf6e5..96b4db4520556 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -62,7 +62,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, >>> cut(np.ones(5), 4, labels=False) array([2, 2, 2, 2, 2]) """ - #NOTE: this binning code is changed a bit from histogram for var(x) == 0 + # NOTE: this binning code is changed a bit from histogram for var(x) == 0 if not np.iterable(bins): if np.isscalar(bins) and bins < 1: raise ValueError("`bins` should be a positive integer.") @@ -190,6 +190,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, return fac, bins + def _format_levels(bins, prec, right=True, include_lowest=False): fmt = lambda v: _format_label(v, precision=prec) @@ -209,7 +210,7 @@ def _format_levels(bins, prec, right=True, levels[0] = '[' + levels[0][1:] else: levels = ['[%s, %s)' % (fmt(a), fmt(b)) - for a, b in zip(bins, bins[1:])] + for a, b in zip(bins, bins[1:])] return levels diff --git a/pandas/tools/util.py b/pandas/tools/util.py index f10bfe1ea15df..d4c7190b0d782 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,5 +1,6 @@ from pandas.core.index import Index + def match(needles, haystack): haystack = Index(haystack) needles = Index(needles) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 42f66b97ee6bf..dc0df89d1ef9c 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -28,6 +28,7 @@ def register(): units.registry[pydt.date] = DatetimeConverter() units.registry[pydt.time] = TimeConverter() + def _to_ordinalf(tm): tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + float(tm.microsecond / 1e6)) @@ -51,7 +52,7 @@ class TimeConverter(units.ConversionInterface): def convert(value, unit, axis): valid_types = (str, pydt.time) if (isinstance(value, valid_types) or com.is_integer(value) or - com.is_float(value)): + com.is_float(value)): return time2num(value) if isinstance(value, Index): return value.map(time2num) @@ -106,7 +107,7 @@ def convert(values, units, axis): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (str, datetime, Period, pydt.date, pydt.time) if (isinstance(values, valid_types) or com.is_integer(values) or - com.is_float(values)): + com.is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) @@ -207,12 +208,12 @@ def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): if self._tz is dates.UTC: self._tz._utcoffset = self._tz.utcoffset(None) self.scaled = { - 365.0: '%Y', - 30.: '%b %Y', - 1.0: '%b %d %Y', - 1. / 24.: '%H:%M:%S', - 1. / 24. / 3600. / 1000.: '%H:%M:%S.%f' - } + 365.0: '%Y', + 30.: '%b %Y', + 1.0: '%b %d %Y', + 1. / 24.: '%H:%M:%S', + 1. / 24. / 3600. / 1000.: '%H:%M:%S.%f' + } def _get_fmt(self, x): @@ -317,7 +318,7 @@ def __call__(self): raise RuntimeError(('MillisecondLocator estimated to generate %d ' 'ticks from %s to %s: exceeds Locator.MAXTICKS' '* 2 (%d) ') % - (estimate, dmin, dmax, self.MAXTICKS * 2)) + (estimate, dmin, dmax, self.MAXTICKS * 2)) freq = '%dL' % self._get_interval() tz = self.tz.tzname(None) @@ -329,7 +330,7 @@ def __call__(self): if len(all_dates) > 0: locs = self.raise_if_exceeds(dates.date2num(all_dates)) return locs - except Exception, e: #pragma: no cover + except Exception, e: # pragma: no cover pass lims = dates.date2num([dmin, dmax]) @@ -497,7 +498,7 @@ def _daily_finder(vmin, vmax, freq): def first_label(label_flags): if (label_flags[0] == 0) and (label_flags.size > 1) and \ - ((vmin_orig % 1) > 0.0): + ((vmin_orig % 1) > 0.0): return label_flags[1] else: return label_flags[0] @@ -542,26 +543,43 @@ def _second_finder(label_interval): info['min'][second_start & (_second % label_interval == 0)] = True year_start = period_break(dates_, 'year') info_fmt = info['fmt'] - info_fmt[second_start & (_second % label_interval == 0)] = '%H:%M:%S' + info_fmt[second_start & (_second % + label_interval == 0)] = '%H:%M:%S' info_fmt[day_start] = '%H:%M:%S\n%d-%b' info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' - if span < periodsperday / 12000.0: _second_finder(1) - elif span < periodsperday / 6000.0: _second_finder(2) - elif span < periodsperday / 2400.0: _second_finder(5) - elif span < periodsperday / 1200.0: _second_finder(10) - elif span < periodsperday / 800.0: _second_finder(15) - elif span < periodsperday / 400.0: _second_finder(30) - elif span < periodsperday / 150.0: _minute_finder(1) - elif span < periodsperday / 70.0: _minute_finder(2) - elif span < periodsperday / 24.0: _minute_finder(5) - elif span < periodsperday / 12.0: _minute_finder(15) - elif span < periodsperday / 6.0: _minute_finder(30) - elif span < periodsperday / 2.5: _hour_finder(1, False) - elif span < periodsperday / 1.5: _hour_finder(2, False) - elif span < periodsperday * 1.25: _hour_finder(3, False) - elif span < periodsperday * 2.5: _hour_finder(6, True) - elif span < periodsperday * 4: _hour_finder(12, True) + if span < periodsperday / 12000.0: + _second_finder(1) + elif span < periodsperday / 6000.0: + _second_finder(2) + elif span < periodsperday / 2400.0: + _second_finder(5) + elif span < periodsperday / 1200.0: + _second_finder(10) + elif span < periodsperday / 800.0: + _second_finder(15) + elif span < periodsperday / 400.0: + _second_finder(30) + elif span < periodsperday / 150.0: + _minute_finder(1) + elif span < periodsperday / 70.0: + _minute_finder(2) + elif span < periodsperday / 24.0: + _minute_finder(5) + elif span < periodsperday / 12.0: + _minute_finder(15) + elif span < periodsperday / 6.0: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) else: info_maj[month_start] = True info_min[day_start] = True @@ -887,6 +905,8 @@ def autoscale(self): #####------------------------------------------------------------------------- #---- --- Formatter --- #####------------------------------------------------------------------------- + + class TimeSeries_DateFormatter(Formatter): """ Formats the ticks along an axis controlled by a :class:`PeriodIndex`. diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 299e93ddfe74d..3bf29af8581a9 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -23,6 +23,7 @@ class FreqGroup(object): FR_MIN = 8000 FR_SEC = 9000 + class Resolution(object): RESO_US = 0 @@ -33,15 +34,17 @@ class Resolution(object): @classmethod def get_str(cls, reso): - return {RESO_US : 'microsecond', - RESO_SEC : 'second', - RESO_MIN : 'minute', - RESO_HR : 'hour', - RESO_DAY : 'day'}.get(reso, 'day') + return {RESO_US: 'microsecond', + RESO_SEC: 'second', + RESO_MIN: 'minute', + RESO_HR: 'hour', + RESO_DAY: 'day'}.get(reso, 'day') + def get_reso_string(reso): return Resolution.get_str(reso) + def get_to_timestamp_base(base): if base <= FreqGroup.FR_WK: return FreqGroup.FR_DAY @@ -78,11 +81,11 @@ def get_freq_code(freqstr): if isinstance(freqstr, tuple): if (com.is_integer(freqstr[0]) and - com.is_integer(freqstr[1])): - #e.g., freqstr = (2000, 1) + com.is_integer(freqstr[1])): + # e.g., freqstr = (2000, 1) return freqstr else: - #e.g., freqstr = ('T', 5) + # e.g., freqstr = ('T', 5) try: code = _period_str_to_code(freqstr[0]) stride = freqstr[1] diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 1ff675bbed83a..891d7a1b52d07 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -8,8 +8,9 @@ from pandas.core.common import isnull from pandas.core.index import Index, Int64Index -from pandas.tseries.frequencies import (infer_freq, to_offset, get_period_alias, - Resolution, get_reso_string) +from pandas.tseries.frequencies import ( + infer_freq, to_offset, get_period_alias, + Resolution, get_reso_string) from pandas.tseries.offsets import DateOffset, generate_range, Tick from pandas.tseries.tools import parse_time_string, normalize_date from pandas.util.decorators import cache_readonly @@ -256,7 +257,7 @@ def __new__(cls, data=None, tz = tools._maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or - getattr(data, 'tz', None) is None): + getattr(data, 'tz', None) is None): # Convert tz-naive to UTC ints = subarr.view('i8') subarr = tslib.tz_localize_to_utc(ints, tz) @@ -337,7 +338,7 @@ def _generate(cls, start, end, periods, name, offset, if (offset._should_cache() and not (offset._normalize_cache and not _normalized) and - _naive_in_cache_range(start, end)): + _naive_in_cache_range(start, end)): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: @@ -362,7 +363,7 @@ def _generate(cls, start, end, periods, name, offset, if (offset._should_cache() and not (offset._normalize_cache and not _normalized) and - _naive_in_cache_range(start, end)): + _naive_in_cache_range(start, end)): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: @@ -909,7 +910,7 @@ def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None if (isinstance(other, DatetimeIndex) and self.offset == other.offset - and self._can_fast_union(other)): + and self._can_fast_union(other)): joined = self._view_like(joined) joined.name = name return joined @@ -1304,7 +1305,7 @@ def equals(self, other): return True if (not hasattr(other, 'inferred_type') or - other.inferred_type != 'datetime64'): + other.inferred_type != 'datetime64'): if self.offset is not None: return False try: @@ -1315,7 +1316,8 @@ def equals(self, other): if self.tz is not None: if other.tz is None: return False - same_zone = tslib.get_timezone(self.tz) == tslib.get_timezone(other.tz) + same_zone = tslib.get_timezone( + self.tz) == tslib.get_timezone(other.tz) else: if other.tz is not None: return False @@ -1645,6 +1647,7 @@ def _time_to_micros(time): seconds = time.hour * 60 * 60 + 60 * time.minute + time.second return 1000000 * seconds + time.microsecond + def _process_concat_data(to_concat, name): klass = Index kwargs = {} @@ -1684,7 +1687,7 @@ def _process_concat_data(to_concat, name): to_concat = [x.values for x in to_concat] klass = DatetimeIndex - kwargs = {'tz' : tz} + kwargs = {'tz': tz} concat = com._concat_compat else: for i, x in enumerate(to_concat): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index c340ac261592f..24594d1bea9d6 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -116,7 +116,7 @@ def __repr__(self): attrs = [] for attr in self.__dict__: if ((attr == 'kwds' and len(self.kwds) == 0) - or attr.startswith('_')): + or attr.startswith('_')): continue if attr not in exclude: attrs.append('='.join((attr, repr(getattr(self, attr))))) @@ -415,7 +415,8 @@ def apply(self, other): n = self.n wkday, days_in_month = tslib.monthrange(other.year, other.month) - lastBDay = days_in_month - max(((wkday + days_in_month - 1) % 7) - 4, 0) + lastBDay = days_in_month - max(((wkday + days_in_month - 1) + % 7) - 4, 0) if n > 0 and not other.day >= lastBDay: n = n - 1 @@ -630,7 +631,8 @@ def apply(self, other): n = self.n wkday, days_in_month = tslib.monthrange(other.year, other.month) - lastBDay = days_in_month - max(((wkday + days_in_month - 1) % 7) - 4, 0) + lastBDay = days_in_month - max(((wkday + days_in_month - 1) + % 7) - 4, 0) monthsToGo = 3 - ((other.month - self.startingMonth) % 3) if monthsToGo == 3: @@ -824,11 +826,11 @@ def apply(self, other): years = n if n > 0: if (other.month < self.month or - (other.month == self.month and other.day < lastBDay)): + (other.month == self.month and other.day < lastBDay)): years -= 1 elif n <= 0: if (other.month > self.month or - (other.month == self.month and other.day > lastBDay)): + (other.month == self.month and other.day > lastBDay)): years += 1 other = other + relativedelta(years=years) @@ -872,11 +874,11 @@ def apply(self, other): if n > 0: # roll back first for positive n if (other.month < self.month or - (other.month == self.month and other.day < first)): + (other.month == self.month and other.day < first)): years -= 1 elif n <= 0: # roll forward if (other.month > self.month or - (other.month == self.month and other.day > first)): + (other.month == self.month and other.day > first)): years += 1 # set first bday for result @@ -928,7 +930,7 @@ def _decrement(date): def _rollf(date): if (date.month != self.month or - date.day < tslib.monthrange(date.year, date.month)[1]): + date.day < tslib.monthrange(date.year, date.month)[1]): date = _increment(date) return date @@ -1073,6 +1075,7 @@ def rule_code(self): def isAnchored(self): return False + def _delta_to_tick(delta): if delta.microseconds == 0: if delta.seconds == 0: @@ -1107,25 +1110,31 @@ class Day(Tick, CacheableOffset): _inc = timedelta(1) _rule_base = 'D' + class Hour(Tick): _inc = timedelta(0, 3600) _rule_base = 'H' + class Minute(Tick): _inc = timedelta(0, 60) _rule_base = 'T' + class Second(Tick): _inc = timedelta(0, 1) _rule_base = 'S' + class Milli(Tick): _rule_base = 'L' + class Micro(Tick): _inc = timedelta(microseconds=1) _rule_base = 'U' + class Nano(Tick): _inc = 1 _rule_base = 'N' diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 5be006ea6e200..75decb91485ca 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -429,6 +429,8 @@ def dt64arr_to_periodarr(data, freq, tz): return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) # --- Period index sketch + + def _period_index_cmp(opname): """ Wrap comparison operations to convert datetime-like to datetime64 diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 5fe01161c996c..fff1c33e70e1f 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -25,6 +25,7 @@ #---------------------------------------------------------------------- # Plotting functions and monkey patches + def tsplot(series, plotf, **kwargs): """ Plots a Series on the given Matplotlib axes or the current axes @@ -99,7 +100,7 @@ def _maybe_resample(series, ax, freq, plotf, kwargs): elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): _upsample_others(ax, freq, plotf, kwargs) ax_freq = freq - else: #pragma: no cover + else: # pragma: no cover raise ValueError('Incompatible frequency conversion') return freq, ax_freq, series @@ -140,12 +141,13 @@ def _upsample_others(ax, freq, plotf, kwargs): labels.extend(rlabels) if (legend is not None and kwargs.get('legend', True) and - len(lines) > 0): + len(lines) > 0): title = legend.get_title().get_text() if title == 'None': title = None ax.legend(lines, labels, loc='best', title=title) + def _replot_ax(ax, freq, plotf, kwargs): data = getattr(ax, '_plot_data', None) ax._plot_data = [] diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 39cbb26f4f255..4cc70692ee85f 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -44,7 +44,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W']) rule = self.freq.rule_code if (rule in end_types or - ('-' in rule and rule[:rule.find('-')] in end_types)): + ('-' in rule and rule[:rule.find('-')] in end_types)): if closed is None: closed = 'right' if label is None: @@ -133,7 +133,7 @@ def _get_time_bins(self, axis): # a little hack trimmed = False if (len(binner) > 2 and binner[-2] == axis[-1] and - self.closed == 'right'): + self.closed == 'right'): binner = binner[:-1] trimmed = True @@ -224,7 +224,7 @@ def _resample_timestamps(self, obj): if isinstance(loffset, (DateOffset, timedelta)): if (isinstance(result.index, DatetimeIndex) - and len(result.index) > 0): + and len(result.index) > 0): result.index = result.index + loffset diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index dc5eb1cd6cc27..dc5d5cf67995b 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -12,8 +12,10 @@ except ImportError: raise nose.SkipTest + def test_timtetonum_accepts_unicode(): - assert(converter.time2num("00:01")==converter.time2num(u"00:01")) + assert(converter.time2num("00:01") == converter.time2num(u"00:01")) + class TestDateTimeConverter(unittest.TestCase): @@ -22,9 +24,9 @@ def setUp(self): self.tc = converter.TimeFormatter(None) def test_convert_accepts_unicode(self): - r1 = self.dtc.convert("12:22",None,None) - r2 = self.dtc.convert(u"12:22",None,None) - assert(r1==r2), "DatetimeConverter.convert should accept unicode" + r1 = self.dtc.convert("12:22", None, None) + r2 = self.dtc.convert(u"12:22", None, None) + assert(r1 == r2), "DatetimeConverter.convert should accept unicode" def test_conversion(self): rs = self.dtc.convert(['2012-1-1'], None, None)[0] @@ -49,5 +51,5 @@ def test_time_formatter(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 625eadbc140c2..1a844cdb4f77c 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -14,12 +14,14 @@ import pandas.core.datetools as datetools + def eq_gen_range(kwargs, expected): rng = generate_range(**kwargs) assert(np.array_equal(list(rng), expected)) START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + class TestGenRangeGeneration(unittest.TestCase): def test_generate(self): rng1 = list(generate_range(START, END, offset=datetools.bday)) @@ -38,10 +40,11 @@ def test_2(self): datetime(2008, 1, 3)]) def test_3(self): - eq_gen_range(dict(start = datetime(2008, 1, 5), - end = datetime(2008, 1, 6)), + eq_gen_range(dict(start=datetime(2008, 1, 5), + end=datetime(2008, 1, 6)), []) + class TestDateRange(unittest.TestCase): def setUp(self): @@ -235,8 +238,8 @@ def test_intersection(self): def test_intersection_bug(self): # GH #771 - a = bdate_range('11/30/2011','12/31/2011') - b = bdate_range('12/10/2011','12/20/2011') + a = bdate_range('11/30/2011', '12/31/2011') + b = bdate_range('12/10/2011', '12/20/2011') result = a.intersection(b) self.assert_(result.equals(b)) @@ -296,9 +299,7 @@ def test_range_bug(self): self.assert_(np.array_equal(result, DatetimeIndex(exp_values))) - - if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index 3a96d59d3dd1c..aad831ae48a64 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -16,6 +16,7 @@ import pandas.lib as lib + def test_to_offset_multiple(): freqstr = '2h30min' freqstr2 = '2h 30min' @@ -53,12 +54,13 @@ def test_to_offset_multiple(): else: assert(False) + def test_to_offset_negative(): freqstr = '-1S' result = to_offset(freqstr) assert(result.n == -1) - freqstr='-5min10s' + freqstr = '-5min10s' result = to_offset(freqstr) assert(result.n == -310) @@ -75,6 +77,7 @@ def test_anchored_shortcuts(): _dti = DatetimeIndex + class TestFrequencyInference(unittest.TestCase): def test_raise_if_too_few(self): @@ -197,7 +200,6 @@ def _check_generated_range(self, start, freq): (inf_freq == 'Q-OCT' and gen.freqstr in ('Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN'))) - gen = date_range(start, periods=5, freq=freq) index = _dti(gen.values) if not freq.startswith('Q-'): @@ -243,6 +245,7 @@ def test_non_datetimeindex(self): MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] + def test_is_superperiod_subperiod(): assert(fmod.is_superperiod(offsets.YearEnd(), offsets.MonthEnd())) assert(fmod.is_subperiod(offsets.MonthEnd(), offsets.YearEnd())) @@ -252,5 +255,5 @@ def test_is_superperiod_subperiod(): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 7a368a26115fc..b95a0bb2eff2c 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -23,25 +23,30 @@ _multiprocess_can_split_ = True + def test_monthrange(): import calendar - for y in range(2000,2013): - for m in range(1,13): - assert monthrange(y,m) == calendar.monthrange(y,m) + for y in range(2000, 2013): + for m in range(1, 13): + assert monthrange(y, m) == calendar.monthrange(y, m) #### ## Misc function tests #### + + def test_format(): actual = format(datetime(2008, 1, 15)) assert actual == '20080115' + def test_ole2datetime(): actual = ole2datetime(60000) assert actual == datetime(2064, 4, 8) assert_raises(Exception, ole2datetime, 60) + def test_to_datetime1(): actual = to_datetime(datetime(2008, 1, 15)) assert actual == datetime(2008, 1, 15) @@ -53,17 +58,19 @@ def test_to_datetime1(): s = 'Month 1, 1999' assert to_datetime(s) == s + def test_normalize_date(): actual = normalize_date(datetime(2007, 10, 1, 1, 12, 5, 10)) assert actual == datetime(2007, 10, 1) + def test_to_m8(): valb = datetime(2007, 10, 1) valu = _to_m8(valb) assert type(valu) == np.datetime64 - #assert valu == np.datetime64(datetime(2007,10,1)) + # assert valu == np.datetime64(datetime(2007,10,1)) -#def test_datetime64_box(): +# def test_datetime64_box(): # valu = np.datetime64(datetime(2007,10,1)) # valb = _dt_box(valu) # assert type(valb) == datetime @@ -73,8 +80,10 @@ def test_to_m8(): ### DateOffset Tests ##### + class TestDateOffset(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.d = Timestamp(datetime(2008, 1, 2)) @@ -111,8 +120,10 @@ def test_eq(self): self.assert_(offset1 != offset2) self.assert_(not (offset1 == offset2)) + class TestBusinessDay(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): self.d = datetime(2008, 1, 1) @@ -158,30 +169,31 @@ def testSub(self): self.assertRaises(Exception, off.__sub__, self.d) self.assertEqual(2 * off - off, off) - self.assertEqual(self.d - self.offset2, self.d + BDay(-2)) + self.assertEqual(self.d - self.offset2, self.d + BDay(-2)) def testRSub(self): self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) def testMult1(self): - self.assertEqual(self.d + 10*self.offset, self.d + BDay(10)) + self.assertEqual(self.d + 10 * self.offset, self.d + BDay(10)) def testMult2(self): - self.assertEqual(self.d + (-5*BDay(-10)), + self.assertEqual(self.d + (-5 * BDay(-10)), self.d + BDay(50)) - def testRollback1(self): self.assertEqual(BDay(10).rollback(self.d), self.d) def testRollback2(self): - self.assertEqual(BDay(10).rollback(datetime(2008, 1, 5)), datetime(2008, 1, 4)) + self.assertEqual( + BDay(10).rollback(datetime(2008, 1, 5)), datetime(2008, 1, 4)) def testRollforward1(self): self.assertEqual(BDay(10).rollforward(self.d), self.d) def testRollforward2(self): - self.assertEqual(BDay(10).rollforward(datetime(2008, 1, 5)), datetime(2008, 1, 7)) + self.assertEqual( + BDay(10).rollforward(datetime(2008, 1, 5)), datetime(2008, 1, 7)) def test_roll_date_object(self): offset = BDay() @@ -218,7 +230,7 @@ def test_apply(self): datetime(2008, 1, 6): datetime(2008, 1, 7), datetime(2008, 1, 7): datetime(2008, 1, 8)})) - tests.append((2*bday, + tests.append((2 * bday, {datetime(2008, 1, 1): datetime(2008, 1, 3), datetime(2008, 1, 4): datetime(2008, 1, 8), datetime(2008, 1, 5): datetime(2008, 1, 8), @@ -233,7 +245,7 @@ def test_apply(self): datetime(2008, 1, 7): datetime(2008, 1, 4), datetime(2008, 1, 8): datetime(2008, 1, 7)})) - tests.append((-2*bday, + tests.append((-2 * bday, {datetime(2008, 1, 1): datetime(2007, 12, 28), datetime(2008, 1, 4): datetime(2008, 1, 2), datetime(2008, 1, 5): datetime(2008, 1, 3), @@ -271,10 +283,12 @@ def test_offsets_compare_equal(self): offset2 = BDay() self.assertFalse(offset1 != offset2) + def assertOnOffset(offset, date, expected): actual = offset.onOffset(date) assert actual == expected + class TestWeek(unittest.TestCase): def test_corner(self): self.assertRaises(Exception, Week, weekday=7) @@ -289,28 +303,28 @@ def test_isAnchored(self): def test_offset(self): tests = [] - tests.append((Week(), # not business week + tests.append((Week(), # not business week {datetime(2008, 1, 1): datetime(2008, 1, 8), datetime(2008, 1, 4): datetime(2008, 1, 11), datetime(2008, 1, 5): datetime(2008, 1, 12), datetime(2008, 1, 6): datetime(2008, 1, 13), datetime(2008, 1, 7): datetime(2008, 1, 14)})) - tests.append((Week(weekday=0), # Mon + tests.append((Week(weekday=0), # Mon {datetime(2007, 12, 31): datetime(2008, 1, 7), datetime(2008, 1, 4): datetime(2008, 1, 7), datetime(2008, 1, 5): datetime(2008, 1, 7), datetime(2008, 1, 6): datetime(2008, 1, 7), datetime(2008, 1, 7): datetime(2008, 1, 14)})) - tests.append((Week(0, weekday=0), # n=0 -> roll forward. Mon + tests.append((Week(0, weekday=0), # n=0 -> roll forward. Mon {datetime(2007, 12, 31): datetime(2007, 12, 31), datetime(2008, 1, 4): datetime(2008, 1, 7), datetime(2008, 1, 5): datetime(2008, 1, 7), datetime(2008, 1, 6): datetime(2008, 1, 7), datetime(2008, 1, 7): datetime(2008, 1, 7)})) - tests.append((Week(-2, weekday=1), # n=0 -> roll forward. Mon + tests.append((Week(-2, weekday=1), # n=0 -> roll forward. Mon {datetime(2010, 4, 6): datetime(2010, 3, 23), datetime(2010, 4, 8): datetime(2010, 3, 30), datetime(2010, 4, 5): datetime(2010, 3, 23)})) @@ -338,6 +352,7 @@ def test_offsets_compare_equal(self): offset2 = Week() self.assertFalse(offset1 != offset2) + class TestWeekOfMonth(unittest.TestCase): def test_constructor(self): @@ -348,10 +363,10 @@ def test_constructor(self): self.assertRaises(Exception, WeekOfMonth, n=1, week=0, weekday=7) def test_offset(self): - date1 = datetime(2011, 1, 4) # 1st Tuesday of Month - date2 = datetime(2011, 1, 11) # 2nd Tuesday of Month - date3 = datetime(2011, 1, 18) # 3rd Tuesday of Month - date4 = datetime(2011, 1, 25) # 4th Tuesday of Month + date1 = datetime(2011, 1, 4) # 1st Tuesday of Month + date2 = datetime(2011, 1, 11) # 2nd Tuesday of Month + date3 = datetime(2011, 1, 18) # 3rd Tuesday of Month + date4 = datetime(2011, 1, 25) # 4th Tuesday of Month # see for loop for structure test_cases = [ @@ -413,6 +428,7 @@ def test_onOffset(self): offset = WeekOfMonth(week=week, weekday=weekday) self.assert_(offset.onOffset(date) == expected) + class TestBMonthBegin(unittest.TestCase): def test_offset(self): tests = [] @@ -534,13 +550,14 @@ def test_offsets_compare_equal(self): offset2 = BMonthEnd() self.assertFalse(offset1 != offset2) + class TestMonthBegin(unittest.TestCase): def test_offset(self): tests = [] - #NOTE: I'm not entirely happy with the logic here for Begin -ss - #see thread 'offset conventions' on the ML + # NOTE: I'm not entirely happy with the logic here for Begin -ss + # see thread 'offset conventions' on the ML tests.append((MonthBegin(), {datetime(2008, 1, 31): datetime(2008, 2, 1), datetime(2008, 2, 1): datetime(2008, 3, 1), @@ -573,6 +590,7 @@ def test_offset(self): for base, expected in cases.iteritems(): assertEq(offset, base, expected) + class TestMonthEnd(unittest.TestCase): def test_offset(self): @@ -639,6 +657,7 @@ def test_onOffset(self): for offset, date, expected in tests: assertOnOffset(offset, date, expected) + class TestBQuarterBegin(unittest.TestCase): def test_isAnchored(self): @@ -664,7 +683,7 @@ def test_offset(self): datetime(2007, 7, 1): datetime(2007, 7, 2), datetime(2007, 4, 1): datetime(2007, 4, 2), datetime(2007, 4, 2): datetime(2007, 7, 2), - datetime(2008, 4, 30): datetime(2008, 7, 1),})) + datetime(2008, 4, 30): datetime(2008, 7, 1), })) tests.append((BQuarterBegin(startingMonth=2), {datetime(2008, 1, 1): datetime(2008, 2, 1), @@ -677,7 +696,7 @@ def test_offset(self): datetime(2008, 8, 15): datetime(2008, 11, 3), datetime(2008, 9, 15): datetime(2008, 11, 3), datetime(2008, 11, 1): datetime(2008, 11, 3), - datetime(2008, 4, 30): datetime(2008, 5, 1),})) + datetime(2008, 4, 30): datetime(2008, 5, 1), })) tests.append((BQuarterBegin(startingMonth=1, n=0), {datetime(2008, 1, 1): datetime(2008, 1, 1), @@ -691,7 +710,7 @@ def test_offset(self): datetime(2007, 4, 2): datetime(2007, 4, 2), datetime(2007, 7, 1): datetime(2007, 7, 2), datetime(2007, 4, 15): datetime(2007, 7, 2), - datetime(2007, 7, 2): datetime(2007, 7, 2),})) + datetime(2007, 7, 2): datetime(2007, 7, 2), })) tests.append((BQuarterBegin(startingMonth=1, n=-1), {datetime(2008, 1, 1): datetime(2007, 10, 1), @@ -704,7 +723,7 @@ def test_offset(self): datetime(2007, 7, 3): datetime(2007, 7, 2), datetime(2007, 4, 3): datetime(2007, 4, 2), datetime(2007, 7, 2): datetime(2007, 4, 2), - datetime(2008, 4, 1): datetime(2008, 1, 1),})) + datetime(2008, 4, 1): datetime(2008, 1, 1), })) tests.append((BQuarterBegin(startingMonth=1, n=2), {datetime(2008, 1, 1): datetime(2008, 7, 1), @@ -713,7 +732,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 7, 1), datetime(2007, 3, 31): datetime(2007, 7, 2), datetime(2007, 4, 15): datetime(2007, 10, 1), - datetime(2008, 4, 30): datetime(2008, 10, 1),})) + datetime(2008, 4, 30): datetime(2008, 10, 1), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -723,6 +742,7 @@ def test_offset(self): offset = BQuarterBegin(n=-1, startingMonth=1) self.assertEqual(datetime(2007, 4, 3) + offset, datetime(2007, 4, 2)) + class TestBQuarterEnd(unittest.TestCase): def test_isAnchored(self): @@ -741,7 +761,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 4, 30), datetime(2008, 3, 31): datetime(2008, 4, 30), datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 7, 31),})) + datetime(2008, 4, 30): datetime(2008, 7, 31), })) tests.append((BQuarterEnd(startingMonth=2), {datetime(2008, 1, 1): datetime(2008, 2, 29), @@ -751,7 +771,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 5, 30), datetime(2008, 3, 31): datetime(2008, 5, 30), datetime(2008, 4, 15): datetime(2008, 5, 30), - datetime(2008, 4, 30): datetime(2008, 5, 30),})) + datetime(2008, 4, 30): datetime(2008, 5, 30), })) tests.append((BQuarterEnd(startingMonth=1, n=0), {datetime(2008, 1, 1): datetime(2008, 1, 31), @@ -761,7 +781,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 4, 30), datetime(2008, 3, 31): datetime(2008, 4, 30), datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 4, 30),})) + datetime(2008, 4, 30): datetime(2008, 4, 30), })) tests.append((BQuarterEnd(startingMonth=1, n=-1), {datetime(2008, 1, 1): datetime(2007, 10, 31), @@ -771,7 +791,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 1, 31), datetime(2008, 3, 31): datetime(2008, 1, 31), datetime(2008, 4, 15): datetime(2008, 1, 31), - datetime(2008, 4, 30): datetime(2008, 1, 31),})) + datetime(2008, 4, 30): datetime(2008, 1, 31), })) tests.append((BQuarterEnd(startingMonth=1, n=2), {datetime(2008, 1, 31): datetime(2008, 7, 31), @@ -780,7 +800,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 7, 31), datetime(2008, 3, 31): datetime(2008, 7, 31), datetime(2008, 4, 15): datetime(2008, 7, 31), - datetime(2008, 4, 30): datetime(2008, 10, 31),})) + datetime(2008, 4, 30): datetime(2008, 10, 31), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -792,40 +812,40 @@ def test_offset(self): def test_onOffset(self): - tests = [(BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), - - (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), - - (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False), - ] + tests = [ + (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False), + ] for offset, date, expected in tests: assertOnOffset(offset, date, expected) + class TestQuarterBegin(unittest.TestCase): def test_isAnchored(self): self.assert_(QuarterBegin(startingMonth=1).isAnchored()) @@ -843,7 +863,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 4, 1), datetime(2008, 3, 31): datetime(2008, 4, 1), datetime(2008, 4, 15): datetime(2008, 7, 1), - datetime(2008, 4, 1): datetime(2008, 7, 1),})) + datetime(2008, 4, 1): datetime(2008, 7, 1), })) tests.append((QuarterBegin(startingMonth=2), {datetime(2008, 1, 1): datetime(2008, 2, 1), @@ -853,7 +873,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 5, 1), datetime(2008, 3, 31): datetime(2008, 5, 1), datetime(2008, 4, 15): datetime(2008, 5, 1), - datetime(2008, 4, 30): datetime(2008, 5, 1),})) + datetime(2008, 4, 30): datetime(2008, 5, 1), })) tests.append((QuarterBegin(startingMonth=1, n=0), {datetime(2008, 1, 1): datetime(2008, 1, 1), @@ -864,7 +884,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 4, 1), datetime(2008, 3, 31): datetime(2008, 4, 1), datetime(2008, 4, 15): datetime(2008, 4, 1), - datetime(2008, 4, 30): datetime(2008, 4, 1),})) + datetime(2008, 4, 30): datetime(2008, 4, 1), })) tests.append((QuarterBegin(startingMonth=1, n=-1), {datetime(2008, 1, 1): datetime(2007, 10, 1), @@ -884,7 +904,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 7, 1), datetime(2008, 3, 31): datetime(2008, 7, 1), datetime(2008, 4, 15): datetime(2008, 10, 1), - datetime(2008, 4, 1): datetime(2008, 10, 1),})) + datetime(2008, 4, 1): datetime(2008, 10, 1), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -894,6 +914,7 @@ def test_offset(self): offset = QuarterBegin(n=-1, startingMonth=1) self.assertEqual(datetime(2010, 2, 1) + offset, datetime(2010, 1, 1)) + class TestQuarterEnd(unittest.TestCase): def test_isAnchored(self): @@ -912,7 +933,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 4, 30), datetime(2008, 3, 31): datetime(2008, 4, 30), datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 7, 31),})) + datetime(2008, 4, 30): datetime(2008, 7, 31), })) tests.append((QuarterEnd(startingMonth=2), {datetime(2008, 1, 1): datetime(2008, 2, 29), @@ -922,7 +943,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 5, 31), datetime(2008, 3, 31): datetime(2008, 5, 31), datetime(2008, 4, 15): datetime(2008, 5, 31), - datetime(2008, 4, 30): datetime(2008, 5, 31),})) + datetime(2008, 4, 30): datetime(2008, 5, 31), })) tests.append((QuarterEnd(startingMonth=1, n=0), {datetime(2008, 1, 1): datetime(2008, 1, 31), @@ -932,7 +953,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 4, 30), datetime(2008, 3, 31): datetime(2008, 4, 30), datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 4, 30),})) + datetime(2008, 4, 30): datetime(2008, 4, 30), })) tests.append((QuarterEnd(startingMonth=1, n=-1), {datetime(2008, 1, 1): datetime(2007, 10, 31), @@ -952,7 +973,7 @@ def test_offset(self): datetime(2008, 3, 15): datetime(2008, 7, 31), datetime(2008, 3, 31): datetime(2008, 7, 31), datetime(2008, 4, 15): datetime(2008, 7, 31), - datetime(2008, 4, 30): datetime(2008, 10, 31),})) + datetime(2008, 4, 30): datetime(2008, 10, 31), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -965,42 +986,67 @@ def test_offset(self): def test_onOffset(self): tests = [(QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), - (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2007, 12, 31), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2008, 2, 29), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2007, 3, 30), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2007, 3, 31), False), (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), - (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), - - (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2008, 5, 30), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2008, 5, 31), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2007, 6, 29), False), + (QuarterEnd( + 1, startingMonth=1), datetime(2007, 6, 30), False), + + (QuarterEnd( + 1, startingMonth=2), datetime(2008, 1, 31), False), + (QuarterEnd( + 1, startingMonth=2), datetime(2007, 12, 31), False), (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), - (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), False), + (QuarterEnd( + 1, startingMonth=2), datetime(2007, 3, 30), False), + (QuarterEnd( + 1, startingMonth=2), datetime(2007, 3, 31), False), + (QuarterEnd( + 1, startingMonth=2), datetime(2008, 4, 30), False), + (QuarterEnd( + 1, startingMonth=2), datetime(2008, 5, 30), False), (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True), - (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), - - (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), - (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), False), + (QuarterEnd( + 1, startingMonth=2), datetime(2007, 6, 29), False), + (QuarterEnd( + 1, startingMonth=2), datetime(2007, 6, 30), False), + + (QuarterEnd( + 1, startingMonth=3), datetime(2008, 1, 31), False), + (QuarterEnd( + 1, startingMonth=3), datetime(2007, 12, 31), True), + (QuarterEnd( + 1, startingMonth=3), datetime(2008, 2, 29), False), + (QuarterEnd( + 1, startingMonth=3), datetime(2007, 3, 30), False), (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True), - (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), False), + (QuarterEnd( + 1, startingMonth=3), datetime(2008, 4, 30), False), + (QuarterEnd( + 1, startingMonth=3), datetime(2008, 5, 30), False), + (QuarterEnd( + 1, startingMonth=3), datetime(2008, 5, 31), False), + (QuarterEnd( + 1, startingMonth=3), datetime(2007, 6, 29), False), (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), True), - ] + ] for offset, date, expected in tests: assertOnOffset(offset, date, expected) + class TestBYearBegin(unittest.TestCase): def test_misspecified(self): @@ -1011,22 +1057,22 @@ def test_offset(self): tests = [] tests.append((BYearBegin(), - {datetime(2008, 1, 1): datetime(2009, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2011, 1, 1) : datetime(2011, 1, 3), - datetime(2011, 1, 3) : datetime(2012, 1, 2), - datetime(2005, 12, 30) : datetime(2006, 1, 2), - datetime(2005, 12, 31) : datetime(2006, 1, 2) - } - )) + {datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2011, 1, 1): datetime(2011, 1, 3), + datetime(2011, 1, 3): datetime(2012, 1, 2), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2) + } + )) tests.append((BYearBegin(0), {datetime(2008, 1, 1): datetime(2008, 1, 1), datetime(2008, 6, 30): datetime(2009, 1, 1), datetime(2008, 12, 31): datetime(2009, 1, 1), datetime(2005, 12, 30): datetime(2006, 1, 2), - datetime(2005, 12, 31): datetime(2006, 1, 2),})) + datetime(2005, 12, 31): datetime(2006, 1, 2), })) tests.append((BYearBegin(-1), {datetime(2007, 1, 1): datetime(2006, 1, 2), @@ -1036,12 +1082,12 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2008, 1, 1), datetime(2006, 12, 29): datetime(2006, 1, 2), datetime(2006, 12, 30): datetime(2006, 1, 2), - datetime(2006, 1, 1): datetime(2005, 1, 3),})) + datetime(2006, 1, 1): datetime(2005, 1, 3), })) tests.append((BYearBegin(-2), {datetime(2007, 1, 1): datetime(2005, 1, 3), datetime(2007, 6, 30): datetime(2006, 1, 2), - datetime(2008, 12, 31): datetime(2007, 1, 1),})) + datetime(2008, 12, 31): datetime(2007, 1, 1), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -1061,15 +1107,14 @@ def test_offset(self): datetime(2008, 6, 30): datetime(2009, 1, 1), datetime(2008, 12, 31): datetime(2009, 1, 1), datetime(2005, 12, 30): datetime(2006, 1, 1), - datetime(2005, 12, 31): datetime(2006, 1, 1),})) + datetime(2005, 12, 31): datetime(2006, 1, 1), })) tests.append((YearBegin(0), {datetime(2008, 1, 1): datetime(2008, 1, 1), datetime(2008, 6, 30): datetime(2009, 1, 1), datetime(2008, 12, 31): datetime(2009, 1, 1), datetime(2005, 12, 30): datetime(2006, 1, 1), - datetime(2005, 12, 31): datetime(2006, 1, 1),})) - + datetime(2005, 12, 31): datetime(2006, 1, 1), })) tests.append((YearBegin(-1), {datetime(2007, 1, 1): datetime(2006, 1, 1), @@ -1077,18 +1122,17 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2008, 1, 1), datetime(2006, 12, 29): datetime(2006, 1, 1), datetime(2006, 12, 30): datetime(2006, 1, 1), - datetime(2007, 1, 1): datetime(2006, 1, 1),})) + datetime(2007, 1, 1): datetime(2006, 1, 1), })) tests.append((YearBegin(-2), {datetime(2007, 1, 1): datetime(2005, 1, 1), datetime(2008, 6, 30): datetime(2007, 1, 1), - datetime(2008, 12, 31): datetime(2007, 1, 1),})) + datetime(2008, 12, 31): datetime(2007, 1, 1), })) for offset, cases in tests: for base, expected in cases.iteritems(): assertEq(offset, base, expected) - def test_onOffset(self): tests = [ @@ -1101,6 +1145,7 @@ def test_onOffset(self): for offset, date, expected in tests: assertOnOffset(offset, date, expected) + class TestBYearEndLagged(unittest.TestCase): def test_bad_month_fail(self): @@ -1141,6 +1186,7 @@ def test_onOffset(self): for offset, date, expected in tests: assertOnOffset(offset, date, expected) + class TestBYearEnd(unittest.TestCase): def test_offset(self): @@ -1151,13 +1197,13 @@ def test_offset(self): datetime(2008, 6, 30): datetime(2008, 12, 31), datetime(2008, 12, 31): datetime(2009, 12, 31), datetime(2005, 12, 30): datetime(2006, 12, 29), - datetime(2005, 12, 31): datetime(2006, 12, 29),})) + datetime(2005, 12, 31): datetime(2006, 12, 29), })) tests.append((BYearEnd(0), {datetime(2008, 1, 1): datetime(2008, 12, 31), datetime(2008, 6, 30): datetime(2008, 12, 31), datetime(2008, 12, 31): datetime(2008, 12, 31), - datetime(2005, 12, 31): datetime(2006, 12, 29),})) + datetime(2005, 12, 31): datetime(2006, 12, 29), })) tests.append((BYearEnd(-1), {datetime(2007, 1, 1): datetime(2006, 12, 29), @@ -1165,12 +1211,12 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2007, 12, 31), datetime(2006, 12, 29): datetime(2005, 12, 30), datetime(2006, 12, 30): datetime(2006, 12, 29), - datetime(2007, 1, 1): datetime(2006, 12, 29),})) + datetime(2007, 1, 1): datetime(2006, 12, 29), })) tests.append((BYearEnd(-2), {datetime(2007, 1, 1): datetime(2005, 12, 30), datetime(2008, 6, 30): datetime(2006, 12, 29), - datetime(2008, 12, 31): datetime(2006, 12, 29),})) + datetime(2008, 12, 31): datetime(2006, 12, 29), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -1188,6 +1234,7 @@ def test_onOffset(self): for offset, date, expected in tests: assertOnOffset(offset, date, expected) + class TestYearEnd(unittest.TestCase): def test_misspecified(self): @@ -1201,13 +1248,13 @@ def test_offset(self): datetime(2008, 6, 30): datetime(2008, 12, 31), datetime(2008, 12, 31): datetime(2009, 12, 31), datetime(2005, 12, 30): datetime(2005, 12, 31), - datetime(2005, 12, 31): datetime(2006, 12, 31),})) + datetime(2005, 12, 31): datetime(2006, 12, 31), })) tests.append((YearEnd(0), {datetime(2008, 1, 1): datetime(2008, 12, 31), datetime(2008, 6, 30): datetime(2008, 12, 31), datetime(2008, 12, 31): datetime(2008, 12, 31), - datetime(2005, 12, 30): datetime(2005, 12, 31),})) + datetime(2005, 12, 30): datetime(2005, 12, 31), })) tests.append((YearEnd(-1), {datetime(2007, 1, 1): datetime(2006, 12, 31), @@ -1215,12 +1262,12 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2007, 12, 31), datetime(2006, 12, 29): datetime(2005, 12, 31), datetime(2006, 12, 30): datetime(2005, 12, 31), - datetime(2007, 1, 1): datetime(2006, 12, 31),})) + datetime(2007, 1, 1): datetime(2006, 12, 31), })) tests.append((YearEnd(-2), {datetime(2007, 1, 1): datetime(2005, 12, 31), datetime(2008, 6, 30): datetime(2006, 12, 31), - datetime(2008, 12, 31): datetime(2006, 12, 31),})) + datetime(2008, 12, 31): datetime(2006, 12, 31), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -1238,6 +1285,7 @@ def test_onOffset(self): for offset, date, expected in tests: assertOnOffset(offset, date, expected) + class TestYearEndDiffMonth(unittest.TestCase): def test_offset(self): @@ -1255,7 +1303,7 @@ def test_offset(self): {datetime(2008, 1, 1): datetime(2008, 3, 31), datetime(2008, 2, 28): datetime(2008, 3, 31), datetime(2008, 3, 31): datetime(2008, 3, 31), - datetime(2005, 3, 30): datetime(2005, 3, 31),})) + datetime(2005, 3, 30): datetime(2005, 3, 31), })) tests.append((YearEnd(-1, month=3), {datetime(2007, 1, 1): datetime(2006, 3, 31), @@ -1263,12 +1311,12 @@ def test_offset(self): datetime(2008, 3, 31): datetime(2007, 3, 31), datetime(2006, 3, 29): datetime(2005, 3, 31), datetime(2006, 3, 30): datetime(2005, 3, 31), - datetime(2007, 3, 1): datetime(2006, 3, 31),})) + datetime(2007, 3, 1): datetime(2006, 3, 31), })) tests.append((YearEnd(-2, month=3), {datetime(2007, 1, 1): datetime(2005, 3, 31), datetime(2008, 6, 30): datetime(2007, 3, 31), - datetime(2008, 3, 31): datetime(2006, 3, 31),})) + datetime(2008, 3, 31): datetime(2006, 3, 31), })) for offset, cases in tests: for base, expected in cases.iteritems(): @@ -1286,14 +1334,16 @@ def test_onOffset(self): for offset, date, expected in tests: assertOnOffset(offset, date, expected) + def assertEq(offset, base, expected): actual = offset + base try: assert actual == expected except AssertionError: raise AssertionError("\nExpected: %s\nActual: %s\nFor Offset: %s)" - "\nAt Date: %s"% - (expected, actual, offset, base)) + "\nAt Date: %s" % + (expected, actual, offset, base)) + def test_Hour(): assertEq(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) @@ -1308,6 +1358,7 @@ def test_Hour(): assert not Hour().isAnchored() + def test_Minute(): assertEq(Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) assertEq(Minute(-1), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) @@ -1320,17 +1371,20 @@ def test_Minute(): assert not Minute().isAnchored() + def test_Second(): assertEq(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1)) assertEq(Second(-1), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) assertEq(2 * Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 2)) - assertEq(-1 * Second(), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) + assertEq( + -1 * Second(), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) assert (Second(3) + Second(2)) == Second(5) assert (Second(3) - Second(2)) == Second() assert not Second().isAnchored() + def test_tick_offset(): assert not Day().isAnchored() assert not Milli().isAnchored() @@ -1353,17 +1407,19 @@ def test_compare_ticks(): assert(kls(3) == kls(3)) assert(kls(3) != kls(4)) + def test_hasOffsetName(): assert hasOffsetName(BDay()) assert not hasOffsetName(BDay(2)) + def test_get_offset_name(): assert_raises(Exception, get_offset_name, BDay(2)) assert get_offset_name(BDay()) == 'B' assert get_offset_name(BMonthEnd()) == 'BM' assert get_offset_name(Week(weekday=0)) == 'W-MON' - assert get_offset_name(Week(weekday=1)) =='W-TUE' + assert get_offset_name(Week(weekday=1)) == 'W-TUE' assert get_offset_name(Week(weekday=2)) == 'W-WED' assert get_offset_name(Week(weekday=3)) == 'W-THU' assert get_offset_name(Week(weekday=4)) == 'W-FRI' @@ -1383,6 +1439,7 @@ def test_get_offset(): assert get_offset('W-FRI') == Week(weekday=4) assert get_offset('w@Sat') == Week(weekday=5) + def test_parse_time_string(): (date, parsed, reso) = parse_time_string('4Q1984') (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') @@ -1390,6 +1447,7 @@ def test_parse_time_string(): assert parsed == parsed_lower assert reso == reso_lower + def test_get_standard_freq(): fstr = get_standard_freq('W') assert fstr == get_standard_freq('w') @@ -1402,6 +1460,7 @@ def test_get_standard_freq(): assert fstr == get_standard_freq('5QuarTer') assert fstr == get_standard_freq(('q', 5)) + def test_quarterly_dont_normalize(): date = datetime(2012, 3, 31, 5, 30) @@ -1447,17 +1506,20 @@ def test_rule_code(self): assert alias == _offset_map[alias].rule_code assert alias == (_offset_map[alias] * 5).rule_code + def test_apply_ticks(): result = offsets.Hour(3).apply(offsets.Hour(4)) exp = offsets.Hour(7) assert(result == exp) + def test_delta_to_tick(): delta = timedelta(3) tick = offsets._delta_to_tick(delta) assert(tick == offsets.Day(3)) + def test_dateoffset_misc(): oset = offsets.DateOffset(months=2, days=4) # it works @@ -1465,6 +1527,7 @@ def test_dateoffset_misc(): assert(not offsets.DateOffset(months=2) == 2) + def test_freq_offsets(): off = BDay(1, offset=timedelta(0, 1800)) assert(off.freqstr == 'B+30Min') @@ -1474,5 +1537,5 @@ def test_freq_offsets(): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index e3b51033a098c..22264a5613922 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -28,6 +28,7 @@ from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm + class TestPeriodProperties(TestCase): "Test properties such as year, month, weekday, etc...." # @@ -305,7 +306,6 @@ def _ex(*args): xp = _ex(2012, 1, 2) self.assertEquals(Period('2012', freq='W').end_time, xp) - def test_properties_annually(self): # Test properties on Periods with annually frequency. a_date = Period(freq='A', year=2007) @@ -322,7 +322,6 @@ def test_properties_quarterly(self): assert_equal((qd + x).qyear, 2007) assert_equal((qd + x).quarter, x + 1) - def test_properties_monthly(self): # Test properties on Periods with daily frequency. m_date = Period(freq='M', year=2007, month=1) @@ -339,7 +338,6 @@ def test_properties_monthly(self): assert_equal(m_ival_x.quarter, 4) assert_equal(m_ival_x.month, x + 1) - def test_properties_weekly(self): # Test properties on Periods with daily frequency. w_date = Period(freq='WK', year=2007, month=1, day=7) @@ -350,7 +348,6 @@ def test_properties_weekly(self): assert_equal(w_date.week, 1) assert_equal((w_date - 1).week, 52) - def test_properties_daily(self): # Test properties on Periods with daily frequency. b_date = Period(freq='B', year=2007, month=1, day=1) @@ -371,7 +368,6 @@ def test_properties_daily(self): assert_equal(d_date.weekday, 0) assert_equal(d_date.dayofyear, 1) - def test_properties_hourly(self): # Test properties on Periods with hourly frequency. h_date = Period(freq='H', year=2007, month=1, day=1, hour=0) @@ -385,11 +381,10 @@ def test_properties_hourly(self): assert_equal(h_date.hour, 0) # - def test_properties_minutely(self): # Test properties on Periods with minutely frequency. t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) + minute=0) # assert_equal(t_date.quarter, 1) assert_equal(t_date.month, 1) @@ -399,11 +394,10 @@ def test_properties_minutely(self): assert_equal(t_date.hour, 0) assert_equal(t_date.minute, 0) - def test_properties_secondly(self): # Test properties on Periods with secondly frequency. s_date = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) # assert_equal(s_date.year, 2007) assert_equal(s_date.quarter, 1) @@ -460,9 +454,11 @@ def test_comparisons(self): self.assertEquals(p, p) self.assert_(not p == 1) + def noWrap(item): return item + class TestFreqConversion(TestCase): "Test frequency conversion of date objects" @@ -493,17 +489,17 @@ def test_conv_annual(self): ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, - hour=0) + hour=0) ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, - hour=23) + hour=23) ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) + hour=23, minute=59) ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) @@ -542,7 +538,6 @@ def test_conv_annual(self): assert_equal(ival_A.asfreq('A'), ival_A) - def test_conv_quarterly(self): # frequency conversion tests: from Quarterly Frequency @@ -562,17 +557,17 @@ def test_conv_quarterly(self): ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, - hour=0) + hour=0) ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, - hour=23) + hour=23) ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) + hour=23, minute=59) ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) @@ -620,17 +615,17 @@ def test_conv_monthly(self): ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, - hour=0) + hour=0) ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, - hour=23) + hour=23) ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) + hour=23, minute=59) ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) assert_equal(ival_M.asfreq('A'), ival_M_to_A) assert_equal(ival_M_end_of_year.asfreq('A'), ival_M_to_A) @@ -652,7 +647,6 @@ def test_conv_monthly(self): assert_equal(ival_M.asfreq('M'), ival_M) - def test_conv_weekly(self): # frequency conversion tests: from Weekly Frequency @@ -695,10 +689,10 @@ def test_conv_weekly(self): if Period(freq='D', year=2007, month=3, day=31).weekday == 6: ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, - quarter=1) + quarter=1) else: ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, - quarter=2) + quarter=2) if Period(freq='D', year=2007, month=1, day=31).weekday == 6: ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) @@ -710,17 +704,17 @@ def test_conv_weekly(self): ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, - hour=0) + hour=0) ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, - hour=23) + hour=23) ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) + hour=23, minute=59) ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) assert_equal(ival_W.asfreq('A'), ival_W_to_A) assert_equal(ival_W_end_of_year.asfreq('A'), @@ -762,7 +756,6 @@ def test_conv_weekly(self): assert_equal(ival_W.asfreq('WK'), ival_W) - def test_conv_business(self): # frequency conversion tests: from Business Frequency" @@ -778,17 +771,17 @@ def test_conv_business(self): ival_B_to_W = Period(freq='WK', year=2007, month=1, day=7) ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, - hour=0) + hour=0) ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, - hour=23) + hour=23) ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) + hour=23, minute=59) ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) assert_equal(ival_B.asfreq('A'), ival_B_to_A) assert_equal(ival_B_end_of_year.asfreq('A'), ival_B_to_A) @@ -810,7 +803,6 @@ def test_conv_business(self): assert_equal(ival_B.asfreq('B'), ival_B) - def test_conv_daily(self): # frequency conversion tests: from Business Frequency" @@ -842,17 +834,17 @@ def test_conv_daily(self): ival_D_to_W = Period(freq='WK', year=2007, month=1, day=7) ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, - hour=0) + hour=0) ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, - hour=23) + hour=23) ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) + hour=23, minute=59) ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) assert_equal(ival_D.asfreq('A'), ival_D_to_A) @@ -895,15 +887,15 @@ def test_conv_hourly(self): ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, hour=23) ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, - hour=23) + hour=23) ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, - hour=23) + hour=23) ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, hour=23) ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, - hour=23) + hour=23) ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, - hour=23) + hour=23) ival_H_to_A = Period(freq='A', year=2007) ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) @@ -913,13 +905,13 @@ def test_conv_hourly(self): ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=59) + hour=0, minute=59) ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=59, second=59) + hour=0, minute=59, second=59) assert_equal(ival_H.asfreq('A'), ival_H_to_A) assert_equal(ival_H_end_of_year.asfreq('A'), ival_H_to_A) @@ -949,15 +941,15 @@ def test_conv_minutely(self): ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, hour=23, minute=59) ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) + hour=23, minute=59) ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) + hour=23, minute=59) ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, hour=23, minute=59) ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) + hour=23, minute=59) ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) + hour=23, minute=59) ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, hour=0, minute=59) @@ -970,9 +962,9 @@ def test_conv_minutely(self): ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=0) + hour=0, minute=0, second=0) ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=59) + hour=0, minute=0, second=59) assert_equal(ival_T.asfreq('A'), ival_T_to_A) assert_equal(ival_T_end_of_year.asfreq('A'), ival_T_to_A) @@ -1002,19 +994,19 @@ def test_conv_secondly(self): ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, hour=23, minute=59, second=59) ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, hour=23, minute=59, second=59) ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) + hour=23, minute=59, second=59) ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=59, second=59) ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=59) + hour=0, minute=0, second=59) ival_S_to_A = Period(freq='A', year=2007) ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) @@ -1023,9 +1015,9 @@ def test_conv_secondly(self): ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, - hour=0) + hour=0) ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) + hour=0, minute=0) assert_equal(ival_S.asfreq('A'), ival_S_to_A) assert_equal(ival_S_end_of_year.asfreq('A'), ival_S_to_A) @@ -1046,6 +1038,7 @@ def test_conv_secondly(self): assert_equal(ival_S.asfreq('S'), ival_S) + class TestPeriodIndex(TestCase): def __init__(self, *args, **kwds): TestCase.__init__(self, *args, **kwds) @@ -1081,8 +1074,9 @@ def test_constructor_field_arrays(self): expected = period_range('1990Q3', '2009Q2', freq='Q-DEC') self.assert_(index.equals(expected)) - self.assertRaises(ValueError, PeriodIndex, year=years, quarter=quarters, - freq='2Q-DEC') + self.assertRaises( + ValueError, PeriodIndex, year=years, quarter=quarters, + freq='2Q-DEC') index = PeriodIndex(year=years, quarter=quarters) self.assert_(index.equals(expected)) @@ -1223,7 +1217,8 @@ def test_sub(self): self.assert_(result.equals(exp)) def test_periods_number_check(self): - self.assertRaises(ValueError, period_range, '2011-1-1', '2012-1-1', 'B') + self.assertRaises( + ValueError, period_range, '2011-1-1', '2012-1-1', 'B') def test_to_timestamp(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') @@ -1238,7 +1233,6 @@ def test_to_timestamp(self): result = series.to_timestamp(how='start') self.assert_(result.index.equals(exp_index)) - def _get_with_delta(delta, freq='A-DEC'): return date_range(to_datetime('1/1/2001') + delta, to_datetime('12/31/2009') + delta, freq=freq) @@ -1476,7 +1470,8 @@ def test_constructor(self): try: PeriodIndex(start=start) - raise AssertionError('Must specify periods if missing start or end') + raise AssertionError( + 'Must specify periods if missing start or end') except ValueError: pass @@ -1598,7 +1593,7 @@ def test_ts_repr(self): def test_frame_index_to_string(self): index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') - frame = DataFrame(np.random.randn(3,4), index=index) + frame = DataFrame(np.random.randn(3, 4), index=index) # it works! frame.to_string() @@ -1902,7 +1897,7 @@ def test_convert_array_of_periods(self): def test_with_multi_index(self): # #1705 - index = date_range('1/1/2012',periods=4,freq='12H') + index = date_range('1/1/2012', periods=4, freq='12H') index_as_arrays = [index.to_period(freq='D'), index.hour] s = Series([0, 1, 2, 3], index_as_arrays) @@ -1912,7 +1907,7 @@ def test_with_multi_index(self): self.assert_(isinstance(s.index.values[0][0], Period)) def test_to_datetime_1703(self): - index = period_range('1/1/2012',periods=4,freq='D') + index = period_range('1/1/2012', periods=4, freq='D') result = index.to_datetime() self.assertEquals(result[0], Timestamp('1/1/2012')) @@ -1929,10 +1924,11 @@ def test_append_concat(self): s2 = s2.to_period() # drops index - result = pd.concat([s1,s2]) + result = pd.concat([s1, s2]) self.assert_(isinstance(result.index, PeriodIndex)) self.assertEquals(result.index[0], s1.index[0]) + def _permute(obj): return obj.take(np.random.permutation(len(obj))) @@ -1987,7 +1983,7 @@ def _check_freq(self, freq, base_date): self.assert_(np.array_equal(rng.values, exp)) def test_negone_ordinals(self): - freqs = ['A', 'M', 'Q', 'D','H', 'T', 'S'] + freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S'] period = Period(ordinal=-1, freq='D') for freq in freqs: @@ -2006,5 +2002,5 @@ def test_negone_ordinals(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index 4b6ae0b003a95..27fa00f146cb2 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -42,14 +42,14 @@ def setUp(self): self.period_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.period_df = [DataFrame(np.random.randn(len(x), 3), index=x, columns=['A', 'B', 'C']) - for x in idx] + for x in idx] freq = ['S', 'T', 'H', 'D', 'W', 'M', 'Q-DEC', 'A', '1B30Min'] idx = [date_range('12/31/1999', freq=x, periods=100) for x in freq] self.datetime_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.datetime_df = [DataFrame(np.random.randn(len(x), 3), index=x, - columns=['A', 'B', 'C']) - for x in idx] + columns=['A', 'B', 'C']) + for x in idx] @slow def test_frame_inferred(self): @@ -103,11 +103,11 @@ def test_both_style_and_color(self): plt.close('all') ts = tm.makeTimeSeries() - ts.plot(style='b-', color='#000099') #works + ts.plot(style='b-', color='#000099') # works plt.close('all') s = ts.reset_index(drop=True) - s.plot(style='b-', color='#000099') #non-tsplot + s.plot(style='b-', color='#000099') # non-tsplot @slow def test_high_freq(self): @@ -222,7 +222,7 @@ def test_irreg_hf(self): diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() sec = 1. / 24 / 60 / 60 - self.assert_((np.fabs(diffs[1:] - [sec, sec*2, sec]) < 1e-8).all()) + self.assert_((np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all()) plt.clf() fig.add_subplot(111) @@ -236,7 +236,7 @@ def test_irreg_hf(self): def test_irregular_datetime64_repr_bug(self): import matplotlib.pyplot as plt ser = tm.makeTimeSeries() - ser = ser[[0,1,2,7]] + ser = ser[[0, 1, 2, 7]] fig = plt.gcf() plt.clf() @@ -313,11 +313,11 @@ def _test(ax): ax = ser.plot() _test(ax) - df = DataFrame({'a' : ser, 'b' : ser + 1}) + df = DataFrame({'a': ser, 'b': ser + 1}) ax = df.plot() _test(ax) - df = DataFrame({'a' : ser, 'b' : ser + 1}) + df = DataFrame({'a': ser, 'b': ser + 1}) axes = df.plot(subplots=True) [_test(ax) for ax in axes] @@ -388,7 +388,7 @@ def test_finder_monthly(self): def test_finder_monthly_long(self): import matplotlib.pyplot as plt plt.close('all') - rng = period_range('1988Q1', periods=24*12, freq='M') + rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() @@ -705,12 +705,12 @@ def test_from_weekly_resampling(self): @slow def test_irreg_dtypes(self): - #date + # date idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] df = DataFrame(np.random.randn(len(idx), 3), Index(idx, dtype=object)) _check_plot_works(df.plot) - #np.datetime64 + # np.datetime64 idx = date_range('1/1/2000', periods=10) idx = idx[[0, 2, 5, 9]].asobject df = DataFrame(np.random.randn(len(idx), 3), idx) @@ -724,8 +724,8 @@ def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) - df = DataFrame({'a' : np.random.randn(len(ts)), - 'b' : np.random.randn(len(ts))}, + df = DataFrame({'a': np.random.randn(len(ts)), + 'b': np.random.randn(len(ts))}, index=ts) ax = df.plot() @@ -763,8 +763,8 @@ def test_time_musec(self): deltas = np.random.randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(microseconds=int(x))).time() for x in deltas]) - df = DataFrame({'a' : np.random.randn(len(ts)), - 'b' : np.random.randn(len(ts))}, + df = DataFrame({'a': np.random.randn(len(ts)), + 'b': np.random.randn(len(ts))}, index=ts) ax = df.plot() @@ -802,7 +802,7 @@ def test_secondary_legend(self): plt.clf() ax = fig.add_subplot(211) - #ts + # ts df = tm.makeTimeDataFrame() ax = df.plot(secondary_y=['A', 'B']) leg = ax.get_legend() @@ -843,7 +843,7 @@ def test_secondary_legend(self): # TODO: color cycle problems self.assert_(len(colors) == 4) - #non-ts + # non-ts df = tm.makeDataFrame() plt.clf() ax = fig.add_subplot(211) @@ -915,6 +915,8 @@ def test_mpl_nopandas(self): line2.get_xydata()[:, 0]) PNG_PATH = 'tmp.png' + + def _check_plot_works(f, freq=None, series=None, *args, **kwargs): import matplotlib.pyplot as plt @@ -949,5 +951,5 @@ def _check_plot_works(f, freq=None, series=None, *args, **kwargs): os.remove(PNG_PATH) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index af28b2e6b3256..cc45dc36d6537 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -22,6 +22,7 @@ bday = BDay() + def _skip_if_no_pytz(): try: import pytz @@ -31,18 +32,19 @@ def _skip_if_no_pytz(): class TestResample(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): - dti = DatetimeIndex(start=datetime(2005,1,1), - end=datetime(2005,1,10), freq='Min') + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') self.series = Series(np.random.rand(len(dti)), dti) def test_custom_grouper(self): - dti = DatetimeIndex(freq='Min', start=datetime(2005,1,1), - end=datetime(2005,1,10)) + dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10)) - data = np.array([1]*len(dti)) + data = np.array([1] * len(dti)) s = Series(data, index=dti) b = TimeGrouper(Minute(5)) @@ -60,7 +62,6 @@ def test_custom_grouper(self): for f in funcs: g._cython_agg_general(f) - self.assertEquals(g.ngroups, 2593) self.assert_(notnull(g.mean()).all()) @@ -105,8 +106,9 @@ def test_resample_basic(self): def test_resample_basic_from_daily(self): # from daily - dti = DatetimeIndex(start=datetime(2005,1,1), end=datetime(2005,1,10), - freq='D', name='index') + dti = DatetimeIndex( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), + freq='D', name='index') s = Series(np.random.rand(len(dti)), dti) @@ -114,45 +116,45 @@ def test_resample_basic_from_daily(self): result = s.resample('w-sun', how='last') self.assertEquals(len(result), 3) - self.assert_((result.index.dayofweek == [6,6,6]).all()) + self.assert_((result.index.dayofweek == [6, 6, 6]).all()) self.assertEquals(result.irow(0), s['1/2/2005']) self.assertEquals(result.irow(1), s['1/9/2005']) self.assertEquals(result.irow(2), s.irow(-1)) result = s.resample('W-MON', how='last') self.assertEquals(len(result), 2) - self.assert_((result.index.dayofweek == [0,0]).all()) + self.assert_((result.index.dayofweek == [0, 0]).all()) self.assertEquals(result.irow(0), s['1/3/2005']) self.assertEquals(result.irow(1), s['1/10/2005']) result = s.resample('W-TUE', how='last') self.assertEquals(len(result), 2) - self.assert_((result.index.dayofweek == [1,1]).all()) + self.assert_((result.index.dayofweek == [1, 1]).all()) self.assertEquals(result.irow(0), s['1/4/2005']) self.assertEquals(result.irow(1), s['1/10/2005']) result = s.resample('W-WED', how='last') self.assertEquals(len(result), 2) - self.assert_((result.index.dayofweek == [2,2]).all()) + self.assert_((result.index.dayofweek == [2, 2]).all()) self.assertEquals(result.irow(0), s['1/5/2005']) self.assertEquals(result.irow(1), s['1/10/2005']) result = s.resample('W-THU', how='last') self.assertEquals(len(result), 2) - self.assert_((result.index.dayofweek == [3,3]).all()) + self.assert_((result.index.dayofweek == [3, 3]).all()) self.assertEquals(result.irow(0), s['1/6/2005']) self.assertEquals(result.irow(1), s['1/10/2005']) result = s.resample('W-FRI', how='last') self.assertEquals(len(result), 2) - self.assert_((result.index.dayofweek == [4,4]).all()) + self.assert_((result.index.dayofweek == [4, 4]).all()) self.assertEquals(result.irow(0), s['1/7/2005']) self.assertEquals(result.irow(1), s['1/10/2005']) # to biz day result = s.resample('B', how='last') self.assertEquals(len(result), 7) - self.assert_((result.index.dayofweek == [4,0,1,2,3,4,0]).all()) + self.assert_((result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all()) self.assertEquals(result.irow(0), s['1/2/2005']) self.assertEquals(result.irow(1), s['1/3/2005']) self.assertEquals(result.irow(5), s['1/9/2005']) @@ -189,19 +191,22 @@ def test_resample_loffset(self): index=idx + timedelta(minutes=1)) assert_series_equal(result, expected) - expected = s.resample('5min', how='mean', closed='right', label='right', - loffset='1min') + expected = s.resample( + '5min', how='mean', closed='right', label='right', + loffset='1min') assert_series_equal(result, expected) - expected = s.resample('5min', how='mean', closed='right', label='right', - loffset=Minute(1)) + expected = s.resample( + '5min', how='mean', closed='right', label='right', + loffset=Minute(1)) assert_series_equal(result, expected) self.assert_(result.index.freq == Minute(5)) # from daily - dti = DatetimeIndex(start=datetime(2005,1,1), end=datetime(2005,1,10), - freq='D') + dti = DatetimeIndex( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), + freq='D') ser = Series(np.random.rand(len(dti)), dti) # to weekly @@ -211,8 +216,9 @@ def test_resample_loffset(self): def test_resample_upsample(self): # from daily - dti = DatetimeIndex(start=datetime(2005,1,1), end=datetime(2005,1,10), - freq='D', name='index') + dti = DatetimeIndex( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), + freq='D', name='index') s = Series(np.random.rand(len(dti)), dti) @@ -255,8 +261,9 @@ def test_resample_ohlc(self): self.assertEquals(xs['close'], s[4]) def test_resample_reresample(self): - dti = DatetimeIndex(start=datetime(2005,1,1), end=datetime(2005,1,10), - freq='D') + dti = DatetimeIndex( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), + freq='D') s = Series(np.random.rand(len(dti)), dti) bs = s.resample('B', closed='right', label='right') result = bs.resample('8H') @@ -388,7 +395,7 @@ def test_resample_anchored_ticks(self): rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s') ts = Series(np.random.randn(len(rng)), index=rng) - ts[:2] = np.nan # so results are the same + ts[:2] = np.nan # so results are the same freqs = ['t', '5t', '15t', '30t', '4h', '12h'] for freq in freqs: @@ -408,7 +415,7 @@ def test_resample_base(self): def test_resample_daily_anchored(self): rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T') ts = Series(np.random.randn(len(rng)), index=rng) - ts[:2] = np.nan # so results are the same + ts[:2] = np.nan # so results are the same result = ts[2:].resample('D', closed='left', label='left') expected = ts.resample('D', closed='left', label='left') @@ -417,7 +424,7 @@ def test_resample_daily_anchored(self): def test_resample_to_period_monthly_buglet(self): # GH #1259 - rng = date_range('1/1/2000','12/31/2000') + rng = date_range('1/1/2000', '12/31/2000') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('M', kind='period') @@ -539,8 +546,8 @@ def test_resample_not_monotonic(self): assert_series_equal(result, exp) def test_resample_median_bug_1688(self): - df = DataFrame([1, 2], index=[datetime(2012,1,1,0,0,0), - datetime(2012,1,1,0,5,0)]) + df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0), + datetime(2012, 1, 1, 0, 5, 0)]) result = df.resample("T", how=lambda x: x.mean()) exp = df.asfreq('T') @@ -574,7 +581,7 @@ def test_resample_unequal_times(self): # end hour is less than start end = datetime(2012, 7, 31, 4) bad_ind = date_range(start, end, freq="30min") - df = DataFrame({'close':1}, index=bad_ind) + df = DataFrame({'close': 1}, index=bad_ind) # it works! df.resample('AS', 'sum') @@ -584,6 +591,7 @@ def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng) + def _simple_pts(start, end, freq='D'): rng = period_range(start, end, freq=freq) return TimeSeries(np.random.randn(len(rng)), index=rng) @@ -687,7 +695,7 @@ def test_upsample_with_limit(self): def test_annual_upsample(self): ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC') - df = DataFrame({'a' : ts}) + df = DataFrame({'a': ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) @@ -866,9 +874,9 @@ def test_resample_tz_localized(self): result = ts_local.resample('D') # #2245 - idx = date_range('2001-09-20 15:59','2001-09-20 16:00', freq='T', + idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', tz='Australia/Sydney') - s = Series([1,2], index=idx) + s = Series([1, 2], index=idx) result = s.resample('D', closed='right', label='right') ex_index = date_range('2001-09-21', periods=1, freq='D', @@ -890,12 +898,12 @@ def test_closed_left_corner(self): freq='1min', periods=21)) s[0] = np.nan - result = s.resample('10min', how='mean',closed='left', label='right') - exp = s[1:].resample('10min', how='mean',closed='left', label='right') + result = s.resample('10min', how='mean', closed='left', label='right') + exp = s[1:].resample('10min', how='mean', closed='left', label='right') assert_series_equal(result, exp) - result = s.resample('10min', how='mean',closed='left', label='left') - exp = s[1:].resample('10min', how='mean',closed='left', label='left') + result = s.resample('10min', how='mean', closed='left', label='left') + exp = s[1:].resample('10min', how='mean', closed='left', label='left') ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3) @@ -1004,7 +1012,7 @@ def test_apply_iteration(self): # #2300 N = 1000 ind = pd.date_range(start="2000-01-01", freq="D", periods=N) - df = DataFrame({'open':1, 'close':2}, index=ind) + df = DataFrame({'open': 1, 'close': 2}, index=ind) tg = TimeGrouper('M') grouper = tg.get_grouper(df) @@ -1020,7 +1028,7 @@ def test_apply_iteration(self): def test_panel_aggregation(self): ind = pd.date_range('1/1/2000', periods=100) - data = np.random.randn(2,len(ind),4) + data = np.random.randn(2, len(ind), 4) wp = pd.Panel(data, items=['Item1', 'Item2'], major_axis=ind, minor_axis=['A', 'B', 'C', 'D']) @@ -1037,5 +1045,5 @@ def f(x): if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 01e9197af3ac8..7ecf33d7e781a 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -43,6 +43,7 @@ class TestTimeSeriesDuplicates(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 3), @@ -61,7 +62,7 @@ def test_is_unique_monotonic(self): def test_index_unique(self): uniques = self.dups.index.unique() - self.assert_(uniques.dtype == 'M8[ns]') # sanity + self.assert_(uniques.dtype == 'M8[ns]') # sanity # #2563 self.assertTrue(isinstance(uniques, DatetimeIndex)) @@ -74,7 +75,7 @@ def test_index_unique(self): def test_index_dupes_contains(self): d = datetime(2011, 12, 5, 20, 30) - ix=DatetimeIndex([d,d]) + ix = DatetimeIndex([d, d]) self.assertTrue(d in ix) def test_duplicate_dates_indexing(self): @@ -159,6 +160,7 @@ def test_indexing_over_size_cutoff(self): finally: _index._SIZE_CUTOFF = old_cutoff + def assert_range_equal(left, right): assert(left.equals(right)) assert(left.freq == right.freq) @@ -174,9 +176,10 @@ def _skip_if_no_pytz(): class TestTimeSeries(unittest.TestCase): _multiprocess_can_split_ = True + def test_dti_slicing(self): dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - dti2 = dti[[1,3,5]] + dti2 = dti[[1, 3, 5]] v1 = dti2[0] v2 = dti2[1] @@ -352,14 +355,14 @@ def test_frame_fillna_limit(self): tm.assert_frame_equal(result, expected) def test_frame_setitem_timestamp(self): - #2155 + # 2155 columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=datetools.bday) index = range(10) data = DataFrame(columns=columns, index=index) t = datetime(2012, 11, 1) ts = Timestamp(t) - data[ts] = np.nan #works + data[ts] = np.nan # works def test_sparse_series_fillna_limit(self): index = np.arange(10) @@ -485,7 +488,7 @@ def test_frame_add_datetime64_col_other_units(self): dtype = np.dtype('M8[%s]' % unit) vals = np.arange(n, dtype=np.int64).view(dtype) - df = DataFrame({'ints' : np.arange(n)}, index=np.arange(n)) + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) df[unit] = vals ex_vals = to_datetime(vals.astype('O')) @@ -494,7 +497,7 @@ def test_frame_add_datetime64_col_other_units(self): self.assert_((df[unit].values == ex_vals).all()) # Test insertion into existing datetime64 column - df = DataFrame({'ints' : np.arange(n)}, index=np.arange(n)) + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) for unit in units: @@ -585,7 +588,6 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) - series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') filled = series.fillna(method='bfill') @@ -662,13 +664,13 @@ def test_to_datetime_iso8601(self): exp = Timestamp("2012-01-01 00:00:00") self.assert_(result[0] == exp) - result = to_datetime(['20121001']) # bad iso 8601 + result = to_datetime(['20121001']) # bad iso 8601 exp = Timestamp('2012-10-01') self.assert_(result[0] == exp) def test_to_datetime_default(self): rs = to_datetime('2001') - xp = datetime(2001,1,1) + xp = datetime(2001, 1, 1) self.assert_(rs, xp) def test_nat_vector_field_access(self): @@ -981,7 +983,7 @@ def test_between_time(self): expected = ts.between_time(stime, etime) assert_series_equal(result, expected) - #across midnight + # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(22, 0) @@ -1041,7 +1043,7 @@ def test_between_time_frame(self): expected = ts.between_time(stime, etime) assert_frame_equal(result, expected) - #across midnight + # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(22, 0) @@ -1213,8 +1215,8 @@ def test_astype_object(self): def test_catch_infinite_loop(self): offset = datetools.DateOffset(minute=5) # blow up, don't loop forever - self.assertRaises(Exception, date_range, datetime(2011,11,11), - datetime(2011,11,12), freq=offset) + self.assertRaises(Exception, date_range, datetime(2011, 11, 11), + datetime(2011, 11, 12), freq=offset) def test_append_concat(self): rng = date_range('5/8/2012 1:45', periods=10, freq='5T') @@ -1244,9 +1246,9 @@ def test_append_concat(self): def test_set_dataframe_column_ns_dtype(self): x = DataFrame([datetime.now(), datetime.now()]) - #self.assert_(x[0].dtype == object) + # self.assert_(x[0].dtype == object) - #x[0] = to_datetime(x[0]) + # x[0] = to_datetime(x[0]) self.assert_(x[0].dtype == np.dtype('M8[ns]')) def test_groupby_count_dateparseerror(self): @@ -1298,9 +1300,9 @@ def test_series_interpolate_method_values(self): def test_frame_datetime64_handling_groupby(self): # it works! - df = DataFrame([(3,np.datetime64('2012-07-03')), - (3,np.datetime64('2012-07-04'))], - columns = ['a', 'date']) + df = DataFrame([(3, np.datetime64('2012-07-03')), + (3, np.datetime64('2012-07-04'))], + columns=['a', 'date']) result = df.groupby('a').first() self.assertEqual(result['date'][3], np.datetime64('2012-07-03')) @@ -1388,6 +1390,7 @@ def test_to_csv_numpy_16_bug(self): result = buf.getvalue() self.assert_('2000-01-01' in result) + def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng) @@ -1395,6 +1398,7 @@ def _simple_ts(start, end, freq='D'): class TestDatetimeIndex(unittest.TestCase): _multiprocess_can_split_ = True + def test_append_join_nondatetimeindex(self): rng = date_range('1/1/2000', periods=10) idx = Index(['a', 'b', 'c', 'd']) @@ -1420,13 +1424,12 @@ def test_to_period_nofreq(self): idx.to_period() def test_000constructor_resolution(self): - #2252 - t1 = Timestamp((1352934390*1000000000)+1000000+1000+1) + # 2252 + t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) idx = DatetimeIndex([t1]) self.assert_(idx.nanosecond[0] == t1.nanosecond) - def test_constructor_coverage(self): rng = date_range('1/1/2000', periods=10.5) exp = date_range('1/1/2000', periods=10) @@ -1601,7 +1604,7 @@ def test_map_bug_1677(self): def test_groupby_function_tuple_1677(self): df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100)) - monthly_group = df.groupby(lambda x: (x.year,x.month)) + monthly_group = df.groupby(lambda x: (x.year, x.month)) result = monthly_group.mean() self.assert_(isinstance(result.index[0], tuple)) @@ -1636,11 +1639,13 @@ def test_union(self): def test_union_with_DatetimeIndex(self): i1 = Int64Index(np.arange(0, 20, 2)) i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D') - i1.union(i2) # Works - i2.union(i1) # Fails with "AttributeError: can't set attribute" + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + class TestLegacySupport(unittest.TestCase): _multiprocess_can_split_ = True + @classmethod def setUpClass(cls): if py3compat.PY3: @@ -1843,7 +1848,7 @@ def test_rule_aliases(self): self.assert_(rule == datetools.Micro(10)) def test_slice_year(self): - dti = DatetimeIndex(freq='B', start=datetime(2005,1,1), periods=500) + dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) result = s['2005'] @@ -1862,7 +1867,7 @@ def test_slice_year(self): self.assert_(result == expected) def test_slice_quarter(self): - dti = DatetimeIndex(freq='D', start=datetime(2000,6,1), periods=500) + dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) self.assertEquals(len(s['2001Q1']), 90) @@ -1871,7 +1876,7 @@ def test_slice_quarter(self): self.assertEquals(len(df.ix['1Q01']), 90) def test_slice_month(self): - dti = DatetimeIndex(freq='D', start=datetime(2005,1,1), periods=500) + dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) self.assertEquals(len(s['2005-11']), 30) @@ -1881,7 +1886,7 @@ def test_slice_month(self): assert_series_equal(s['2005-11'], s['11-2005']) def test_partial_slice(self): - rng = DatetimeIndex(freq='D', start=datetime(2005,1,1), periods=500) + rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-05':'2006-02'] @@ -1902,7 +1907,7 @@ def test_partial_slice(self): self.assertRaises(Exception, s.__getitem__, '2004-12-31') def test_partial_slice_daily(self): - rng = DatetimeIndex(freq='H', start=datetime(2005,1,31), periods=500) + rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-1-31'] @@ -1911,12 +1916,12 @@ def test_partial_slice_daily(self): self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') def test_partial_slice_hourly(self): - rng = DatetimeIndex(freq='T', start=datetime(2005,1,1,20,0,0), + rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-1-1'] - assert_series_equal(result, s.ix[:60*4]) + assert_series_equal(result, s.ix[:60 * 4]) result = s['2005-1-1 20'] assert_series_equal(result, s.ix[:60]) @@ -1925,7 +1930,7 @@ def test_partial_slice_hourly(self): self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') def test_partial_slice_minutely(self): - rng = DatetimeIndex(freq='S', start=datetime(2005,1,1,23,59,0), + rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), periods=500) s = Series(np.arange(len(rng)), index=rng) @@ -1939,7 +1944,7 @@ def test_partial_slice_minutely(self): self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') def test_partial_not_monotonic(self): - rng = date_range(datetime(2005,1,1), periods=20, freq='M') + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') ts = Series(np.arange(len(rng)), index=rng) ts = ts.take(np.random.permutation(20)) @@ -1957,7 +1962,8 @@ def test_date_range_normalize(self): self.assert_(np.array_equal(rng, values)) - rng = date_range('1/1/2000 08:15', periods=n, normalize=False, freq='B') + rng = date_range( + '1/1/2000 08:15', periods=n, normalize=False, freq='B') the_time = time(8, 15) for val in rng: self.assert_(val.time() == the_time) @@ -2029,9 +2035,9 @@ def test_min_max(self): def test_min_max_series(self): rng = date_range('1/1/2000', periods=10, freq='4h') - lvls = ['A','A','A','B','B','B','C','C','C','C'] - df = DataFrame({'TS': rng, 'V' : np.random.randn(len(rng)), - 'L' : lvls}) + lvls = ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'C'] + df = DataFrame({'TS': rng, 'V': np.random.randn(len(rng)), + 'L': lvls}) result = df.TS.max() exp = Timestamp(df.TS.iget(-1)) @@ -2044,8 +2050,8 @@ def test_min_max_series(self): self.assertEqual(result, exp) def test_from_M8_structured(self): - dates = [ (datetime(2012, 9, 9, 0, 0), - datetime(2012, 9, 8, 15, 10))] + dates = [(datetime(2012, 9, 9, 0, 0), + datetime(2012, 9, 8, 15, 10))] arr = np.array(dates, dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')]) df = DataFrame(arr) @@ -2074,10 +2080,10 @@ def test_get_level_values_box(self): def test_frame_apply_dont_convert_datetime64(self): from pandas.tseries.offsets import BDay - df = DataFrame({'x1': [datetime(1996,1,1)]}) + df = DataFrame({'x1': [datetime(1996, 1, 1)]}) - df = df.applymap(lambda x: x+BDay()) - df = df.applymap(lambda x: x+BDay()) + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) self.assertTrue(df.x1.dtype == 'M8[ns]') @@ -2128,14 +2134,14 @@ class TestDatetime64(unittest.TestCase): Also test supoprt for datetime64[ns] in Series / DataFrame """ - def setUp(self): - dti = DatetimeIndex(start=datetime(2005,1,1), - end=datetime(2005,1,10), freq='Min') + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') self.series = Series(rand(len(dti)), dti) def test_datetimeindex_accessors(self): - dti = DatetimeIndex(freq='Q-JAN', start=datetime(1997,12,31), periods=100) + dti = DatetimeIndex( + freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) self.assertEquals(dti.year[0], 1998) self.assertEquals(dti.month[0], 1) @@ -2173,31 +2179,31 @@ def test_nanosecond_field(self): self.assert_(np.array_equal(dti.nanosecond, np.arange(10))) def test_datetimeindex_diff(self): - dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997,12,31), + dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) - dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997,12,31), + dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=98) - self.assert_( len(dti1.diff(dti2)) == 2) + self.assert_(len(dti1.diff(dti2)) == 2) def test_fancy_getitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005,1,1), - end=datetime(2010,1,1)) + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) s = Series(np.arange(len(dti)), index=dti) self.assertEquals(s[48], 48) self.assertEquals(s['1/2/2009'], 48) self.assertEquals(s['2009-1-2'], 48) - self.assertEquals(s[datetime(2009,1,2)], 48) - self.assertEquals(s[lib.Timestamp(datetime(2009,1,2))], 48) + self.assertEquals(s[datetime(2009, 1, 2)], 48) + self.assertEquals(s[lib.Timestamp(datetime(2009, 1, 2))], 48) self.assertRaises(KeyError, s.__getitem__, '2009-1-3') assert_series_equal(s['3/6/2009':'2009-06-05'], - s[datetime(2009,3,6):datetime(2009,6,5)]) + s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) def test_fancy_setitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005,1,1), - end=datetime(2010,1,1)) + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) s = Series(np.arange(len(dti)), index=dti) s[48] = -1 @@ -2214,10 +2220,10 @@ def test_datetimeindex_constructor(self): arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] idx1 = DatetimeIndex(arr) - arr = [datetime(2005,1,1), '1/2/2005', '1/3/2005', '2005-01-04'] + arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] idx2 = DatetimeIndex(arr) - arr = [lib.Timestamp(datetime(2005,1,1)), '1/2/2005', '1/3/2005', + arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', '2005-01-04'] idx3 = DatetimeIndex(arr) @@ -2228,7 +2234,8 @@ def test_datetimeindex_constructor(self): arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) idx5 = DatetimeIndex(arr) - arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04']) + arr = to_datetime( + ['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04']) idx6 = DatetimeIndex(arr) idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) @@ -2237,7 +2244,7 @@ def test_datetimeindex_constructor(self): self.assert_(idx7.equals(idx8)) for other in [idx2, idx3, idx4, idx5, idx6]: - self.assert_( (idx1.values == other.values).all() ) + self.assert_((idx1.values == other.values).all()) sdate = datetime(1999, 12, 25) edate = datetime(2000, 1, 1) @@ -2276,17 +2283,17 @@ def test_dti_snap(self): res = dti.snap(freq='W-MON') exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') exp = exp.repeat([3, 4]) - self.assert_( (res == exp).all() ) + self.assert_((res == exp).all()) res = dti.snap(freq='B') exp = date_range('1/1/2002', '1/7/2002', freq='b') exp = exp.repeat([1, 1, 1, 2, 2]) - self.assert_( (res == exp).all() ) + self.assert_((res == exp).all()) def test_dti_reset_index_round_trip(self): dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') - d1 = DataFrame({'v' : np.random.rand(len(dti))}, index=dti) + d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) d2 = d1.reset_index() self.assert_(d2.dtypes[0] == np.dtype('M8[ns]')) d3 = d2.set_index('index') @@ -2333,7 +2340,7 @@ def test_slice_locs_indexerror(self): times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] s = Series(range(100000), times) - s.ix[datetime(1900,1,1):datetime(2100,1,1)] + s.ix[datetime(1900, 1, 1):datetime(2100, 1, 1)] class TestSeriesDatetime64(unittest.TestCase): @@ -2402,13 +2409,13 @@ def test_intercept_astype_object(self): assert_series_equal(result2, expected) df = DataFrame({'a': self.series, - 'b' : np.random.randn(len(self.series))}) + 'b': np.random.randn(len(self.series))}) result = df.values.squeeze() self.assert_((result[:, 0] == expected.values).all()) df = DataFrame({'a': self.series, - 'b' : ['foo'] * len(self.series)}) + 'b': ['foo'] * len(self.series)}) result = df.values.squeeze() self.assert_((result[:, 0] == expected.values).all()) @@ -2419,7 +2426,7 @@ def test_union(self): rng2 = date_range('1/1/1980', '12/1/2001', freq='MS') s2 = Series(np.random.randn(len(rng2)), rng2) - df = DataFrame({'s1' : s1, 's2' : s2}) + df = DataFrame({'s1': s1, 's2': s2}) self.assert_(df.index.values.dtype == np.dtype('M8[ns]')) def test_intersection(self): @@ -2432,7 +2439,7 @@ def test_intersection(self): result = rng.intersection(rng2) self.assert_(result.equals(rng)) - #empty same freq GH2129 + # empty same freq GH2129 rng = date_range('6/1/2000', '6/15/2000', freq='T') result = rng[0:0].intersection(rng) self.assert_(len(result) == 0) @@ -2458,6 +2465,7 @@ def test_string_index_series_name_converted(self): result = df.T['1/3/2000'] self.assertEquals(result.name, df.index[2]) + class TestTimestamp(unittest.TestCase): def test_basics_nanos(self): @@ -2513,7 +2521,7 @@ def test_cant_compare_tz_naive_w_aware(self): self.assertRaises(Exception, b.__lt__, a) self.assertRaises(Exception, b.__gt__, a) - if sys.version_info < (3,3): + if sys.version_info < (3, 3): self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) self.assertRaises(Exception, a.to_pydatetime().__eq__, b) else: @@ -2554,10 +2562,10 @@ def test_frequency_misc(self): self.assertEquals(result, 'H') def test_hash_equivalent(self): - d = {datetime(2011, 1, 1) : 5} + d = {datetime(2011, 1, 1): 5} stamp = Timestamp(datetime(2011, 1, 1)) self.assertEquals(d[stamp], 5) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 58c6b68e3f357..afba12dd02d09 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -43,11 +43,12 @@ def _skip_if_no_pytz(): except ImportError: pass + class FixedOffset(tzinfo): """Fixed offset in minutes east from UTC.""" def __init__(self, offset, name): - self.__offset = timedelta(minutes = offset) + self.__offset = timedelta(minutes=offset) self.__name = name def utcoffset(self, dt): @@ -62,8 +63,10 @@ def dst(self, dt): fixed_off = FixedOffset(-420, '-07:00') fixed_off_no_name = FixedOffset(-330, None) + class TestTimeZoneSupport(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): _skip_if_no_pytz() @@ -100,7 +103,7 @@ def test_timestamp_tz_localize(self): self.assertEquals(result, expected) def test_timestamp_to_datetime_tzoffset(self): - #tzoffset + # tzoffset from dateutil.tz import tzoffset tzinfo = tzoffset(None, 7200) expected = Timestamp('3/11/2012 04:00', tz=tzinfo) @@ -143,7 +146,8 @@ def test_tz_localize_dti(self): dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00', freq='L') - self.assertRaises(pytz.NonExistentTimeError, dti.tz_localize, 'US/Eastern') + self.assertRaises( + pytz.NonExistentTimeError, dti.tz_localize, 'US/Eastern') def test_tz_localize_empty_series(self): # #2248 @@ -167,7 +171,8 @@ def test_create_with_tz(self): stamp = Timestamp('3/11/2012 05:00', tz='US/Eastern') self.assertEquals(stamp.hour, 5) - rng = date_range('3/11/2012 04:00', periods=10, freq='H', tz='US/Eastern') + rng = date_range( + '3/11/2012 04:00', periods=10, freq='H', tz='US/Eastern') self.assertEquals(stamp, rng[1]) @@ -188,7 +193,8 @@ def test_create_with_fixed_tz(self): rng2 = date_range(start, periods=len(rng), tz=off) self.assert_(rng.equals(rng2)) - rng3 = date_range('3/11/2012 05:00:00+07:00', '6/11/2012 05:00:00+07:00') + rng3 = date_range( + '3/11/2012 05:00:00+07:00', '6/11/2012 05:00:00+07:00') self.assert_((rng.values == rng3.values).all()) def test_create_with_fixedoffset_noname(self): @@ -202,7 +208,8 @@ def test_create_with_fixedoffset_noname(self): self.assertEqual(off, idx.tz) def test_date_range_localize(self): - rng = date_range('3/11/2012 03:00', periods=15, freq='H', tz='US/Eastern') + rng = date_range( + '3/11/2012 03:00', periods=15, freq='H', tz='US/Eastern') rng2 = DatetimeIndex(['3/11/2012 03:00', '3/11/2012 04:00'], tz='US/Eastern') rng3 = date_range('3/11/2012 03:00', periods=15, freq='H') @@ -220,7 +227,8 @@ def test_date_range_localize(self): self.assert_(rng[:2].equals(rng2)) # Right before the DST transition - rng = date_range('3/11/2012 00:00', periods=2, freq='H', tz='US/Eastern') + rng = date_range( + '3/11/2012 00:00', periods=2, freq='H', tz='US/Eastern') rng2 = DatetimeIndex(['3/11/2012 00:00', '3/11/2012 01:00'], tz='US/Eastern') self.assert_(rng.equals(rng2)) @@ -461,7 +469,7 @@ def test_to_datetime_tzlocal(self): from dateutil.parser import parse from dateutil.tz import tzlocal dt = parse('2012-06-13T01:39:00Z') - dt = dt.replace(tzinfo = tzlocal()) + dt = dt.replace(tzinfo=tzlocal()) arr = np.array([dt], dtype=object) @@ -481,7 +489,8 @@ def test_frame_no_datetime64_dtype(self): def test_hongkong_tz_convert(self): # #1673 - dr = date_range('2012-01-01','2012-01-10',freq = 'D', tz = 'Hongkong') + dr = date_range( + '2012-01-01', '2012-01-10', freq='D', tz='Hongkong') # it works! dr.hour @@ -502,7 +511,8 @@ def test_shift_localized(self): self.assert_(result.tz == dr_tz.tz) def test_tz_aware_asfreq(self): - dr = date_range('2011-12-01','2012-07-20',freq = 'D', tz = 'US/Eastern') + dr = date_range( + '2011-12-01', '2012-07-20', freq='D', tz='US/Eastern') s = Series(np.random.randn(len(dr)), index=dr) @@ -543,7 +553,7 @@ def test_convert_datetime_list(self): def test_frame_from_records_utc(self): rec = {'datum': 1.5, - 'begin_time' : datetime(2006, 4, 27, tzinfo=pytz.utc)} + 'begin_time': datetime(2006, 4, 27, tzinfo=pytz.utc)} # it works DataFrame.from_records([rec], index='begin_time') @@ -575,13 +585,14 @@ def test_getitem_pydatetime_tz(self): tz='Europe/Berlin') ts = Series(index=index, data=index.hour) time_pandas = Timestamp('2012-12-24 17:00', tz='Europe/Berlin') - time_datetime = datetime(2012,12,24,17,00, + time_datetime = datetime(2012, 12, 24, 17, 00, tzinfo=pytz.timezone('Europe/Berlin')) self.assertEqual(ts[time_pandas], ts[time_datetime]) class TestTimeZones(unittest.TestCase): _multiprocess_can_split_ = True + def setUp(self): _skip_if_no_pytz() @@ -672,13 +683,13 @@ def test_join_aware(self): self.assertRaises(Exception, ts.__add__, ts_utc) self.assertRaises(Exception, ts_utc.__add__, ts) - test1 = DataFrame(np.zeros((6,3)), + test1 = DataFrame(np.zeros((6, 3)), index=date_range("2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central")) - test2 = DataFrame(np.zeros((3,3)), + test2 = DataFrame(np.zeros((3, 3)), index=date_range("2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"), - columns=range(3,6)) + columns=range(3, 6)) result = test1.join(test2, how='outer') ex_index = test1.index.union(test2.index) @@ -691,7 +702,7 @@ def test_join_aware(self): freq="H", tz="US/Central") rng2 = date_range("2012-11-15 12:00:00", periods=6, - freq="H", tz="US/Eastern") + freq="H", tz="US/Eastern") result = rng.union(rng2) self.assertTrue(result.tz.zone == 'UTC') @@ -742,9 +753,9 @@ def test_append_aware_naive(self): ts2 = Series(np.random.randn(len(rng2)), index=rng2) ts_result = ts1.append(ts2) self.assert_(ts_result.index.equals( - ts1.index.asobject.append(ts2.index.asobject))) + ts1.index.asobject.append(ts2.index.asobject))) - #mixed + # mixed rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') rng2 = range(100) @@ -752,7 +763,7 @@ def test_append_aware_naive(self): ts2 = Series(np.random.randn(len(rng2)), index=rng2) ts_result = ts1.append(ts2) self.assert_(ts_result.index.equals( - ts1.index.asobject.append(ts2.index))) + ts1.index.asobject.append(ts2.index))) def test_equal_join_ensure_utc(self): rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') @@ -860,5 +871,5 @@ def test_normalize_tz(self): self.assert_(not rng.is_normalized) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py index 997400bf73b91..09dad264b7ae0 100644 --- a/pandas/tseries/tests/test_util.py +++ b/pandas/tseries/tests/test_util.py @@ -12,6 +12,7 @@ from pandas.tseries.tools import normalize_date from pandas.tseries.util import pivot_annual, isleapyear + class TestPivotAnnual(unittest.TestCase): """ New pandas of scikits.timeseries pivot_annual @@ -38,7 +39,8 @@ def test_daily(self): tm.assert_series_equal(annual[day].dropna(), leaps) def test_hourly(self): - rng_hourly = date_range('1/1/1994', periods=(18* 8760 + 4*24), freq='H') + rng_hourly = date_range( + '1/1/1994', periods=(18 * 8760 + 4 * 24), freq='H') data_hourly = np.random.randint(100, 350, rng_hourly.size) ts_hourly = Series(data_hourly, index=rng_hourly) @@ -101,5 +103,5 @@ def test_normalize_date(): assert(result == datetime(2012, 9, 7)) if __name__ == '__main__': - nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index b724e5eb195ab..671769138d21e 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -16,7 +16,7 @@ # raise exception if dateutil 2.0 install on 2.x platform if (sys.version_info[0] == 2 and - dateutil.__version__ == '2.0'): # pragma: no cover + dateutil.__version__ == '2.0'): # pragma: no cover raise Exception('dateutil 2.0 incompatible with Python 2.x, you must ' 'install version 1.5 or 2.1+!') except ImportError: # pragma: no cover @@ -116,7 +116,7 @@ def _convert_f(arg): try: if not arg: return arg - default = datetime(1,1,1) + default = datetime(1, 1, 1) return parse(arg, dayfirst=dayfirst, default=default) except Exception: if errors == 'raise': @@ -167,7 +167,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): arg = arg.upper() default = datetime(1, 1, 1).replace(hour=0, minute=0, - second=0, microsecond=0) + second=0, microsecond=0) # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 if len(arg) in [4, 6]: @@ -240,6 +240,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): return parsed, parsed, reso # datetime, resolution + def dateutil_parse(timestr, default, ignoretz=False, tzinfos=None, **kwargs): @@ -247,7 +248,7 @@ def dateutil_parse(timestr, default, res = DEFAULTPARSER._parse(StringIO(timestr), **kwargs) if res is None: - raise ValueError, "unknown string format" + raise ValueError("unknown string format") repl = {} for attr in ["year", "month", "day", "hour", @@ -261,7 +262,7 @@ def dateutil_parse(timestr, default, ret = default.replace(**repl) if res.weekday is not None and not res.day: - ret = ret+relativedelta.relativedelta(weekday=res.weekday) + ret = ret + relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: if callable(tzinfos) or tzinfos and res.tzname in tzinfos: if callable(tzinfos): @@ -275,8 +276,8 @@ def dateutil_parse(timestr, default, elif isinstance(tzdata, int): tzinfo = tz.tzoffset(res.tzname, tzdata) else: - raise ValueError, "offset must be tzinfo subclass, " \ - "tz string, or int offset" + raise ValueError("offset must be tzinfo subclass, " + "tz string, or int offset") ret = ret.replace(tzinfo=tzinfo) elif res.tzname and res.tzname in time.tzname: ret = ret.replace(tzinfo=tz.tzlocal()) @@ -286,6 +287,7 @@ def dateutil_parse(timestr, default, ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) return ret, reso + def _attempt_monthly(val): pats = ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y'] for pat in pats: diff --git a/pandas/util/clipboard.py b/pandas/util/clipboard.py index 4136df072c6b6..bc58af8c0ea3c 100644 --- a/pandas/util/clipboard.py +++ b/pandas/util/clipboard.py @@ -49,7 +49,7 @@ def win32_clipboard_get(): import win32clipboard except ImportError: message = ("Getting text from the clipboard requires the pywin32 " - "extensions: http://sourceforge.net/projects/pywin32/") + "extensions: http://sourceforge.net/projects/pywin32/") raise Exception(message) win32clipboard.OpenClipboard() text = win32clipboard.GetClipboardData(win32clipboard.CF_TEXT) @@ -62,7 +62,7 @@ def osx_clipboard_get(): """ Get the clipboard's text on OS X. """ p = subprocess.Popen(['pbpaste', '-Prefer', 'ascii'], - stdout=subprocess.PIPE) + stdout=subprocess.PIPE) text, stderr = p.communicate() # Text comes in with old Mac \r line endings. Change them to \n. text = text.replace('\r', '\n') @@ -80,7 +80,7 @@ def tkinter_clipboard_get(): import Tkinter except ImportError: message = ("Getting text from the clipboard on this platform " - "requires Tkinter.") + "requires Tkinter.") raise Exception(message) root = Tkinter.Tk() root.withdraw() @@ -109,7 +109,7 @@ def osx_clipboard_set(text): """ Get the clipboard's text on OS X. """ p = subprocess.Popen(['pbcopy', '-Prefer', 'ascii'], - stdin=subprocess.PIPE) + stdin=subprocess.PIPE) p.communicate(input=text) diff --git a/pandas/util/compat.py b/pandas/util/compat.py index 24cc28f1193d1..41055f48c2fac 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -34,7 +34,8 @@ class _OrderedDict(dict): # An inherited dict maps keys to values. # The inherited dict provides __getitem__, __len__, __contains__, and get. # The remaining methods are order-aware. - # Big-O running times for all methods are the same as for regular dictionaries. + # Big-O running times for all methods are the same as for regular + # dictionaries. # The internal self.__map dictionary maps keys to links in a doubly linked list. # The circular doubly linked list starts and ends with a sentinel element. @@ -60,7 +61,8 @@ def __init__(self, *args, **kwds): def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 'od.__setitem__(i, y) <==> od[i]=y' # Setting a new item creates a new link which goes at the end of the linked - # list, and the inherited dictionary is updated with the new key/value pair. + # list, and the inherited dictionary is updated with the new key/value + # pair. if key not in self: root = self.__root last = root[0] @@ -70,7 +72,8 @@ def __setitem__(self, key, value, dict_setitem=dict.__setitem__): def __delitem__(self, key, dict_delitem=dict.__delitem__): 'od.__delitem__(y) <==> del od[y]' # Deleting an existing item uses self.__map to find the link which is - # then removed by updating the links in the predecessor and successor nodes. + # then removed by updating the links in the predecessor and successor + # nodes. dict_delitem(self, key) link_prev, link_next, key = self.__map.pop(key) link_prev[1] = link_next @@ -254,7 +257,7 @@ def __eq__(self, other): ''' if isinstance(other, OrderedDict): - return len(self)==len(other) and self.items() == other.items() + return len(self) == len(other) and self.items() == other.items() return dict.__eq__(self, other) def __ne__(self, other): @@ -284,6 +287,7 @@ def viewitems(self): except ImportError: pass + class _Counter(dict): '''Dict subclass for counting hashable objects. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts @@ -364,7 +368,8 @@ def update(self, iterable=None, **kwds): for elem, count in iterable.iteritems(): self[elem] = self_get(elem, 0) + count else: - dict.update(self, iterable) # fast path when counter is empty + dict.update( + self, iterable) # fast path when counter is empty else: self_get = self.get for elem in iterable: @@ -465,8 +470,8 @@ def __and__(self, other): result[elem] = newcount return result -if sys.version_info[:2] < (2,7): - OrderedDict=_OrderedDict - Counter=_Counter +if sys.version_info[:2] < (2, 7): + OrderedDict = _OrderedDict + Counter = _Counter else: from collections import OrderedDict, Counter diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index bef3ffc569df1..15ab39d07ec4d 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -46,7 +46,8 @@ def some_function(x): "%s %s wrote the Raven" """ def __init__(self, *args, **kwargs): - assert not (args and kwargs), "Only positional or keyword args are allowed" + assert not ( + args and kwargs), "Only positional or keyword args are allowed" self.params = args or kwargs def __call__(self, func): @@ -173,7 +174,7 @@ def knownfail_decorator(f): def knownfailer(*args, **kwargs): if fail_val(): - raise KnownFailureTest, msg + raise KnownFailureTest(msg) else: return f(*args, **kwargs) return nose.tools.make_decorator(f)(knownfailer) diff --git a/pandas/util/misc.py b/pandas/util/misc.py index 25edfb7453e27..8372ba56d00cd 100644 --- a/pandas/util/misc.py +++ b/pandas/util/misc.py @@ -1,4 +1,3 @@ def exclusive(*args): count = sum([arg is not None for arg in args]) return count == 1 - diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py index c72133c80046a..dcc877b094dda 100644 --- a/pandas/util/py3compat.py +++ b/pandas/util/py3compat.py @@ -38,4 +38,3 @@ def bytes_to_str(b, encoding='ascii'): from io import BytesIO except: from cStringIO import StringIO as BytesIO - diff --git a/pandas/util/terminal.py b/pandas/util/terminal.py index 312f54b521e90..fbdb239db30ae 100644 --- a/pandas/util/terminal.py +++ b/pandas/util/terminal.py @@ -27,8 +27,8 @@ def get_terminal_size(): tuple_xy = _get_terminal_size_tput() # needed for window's python in cygwin's xterm! if current_os == 'Linux' or \ - current_os == 'Darwin' or \ - current_os.startswith('CYGWIN'): + current_os == 'Darwin' or \ + current_os.startswith('CYGWIN'): tuple_xy = _get_terminal_size_linux() if tuple_xy is None: tuple_xy = (80, 25) # default value @@ -52,7 +52,7 @@ def _get_terminal_size_windows(): if res: import struct (bufx, bufy, curx, cury, wattr, left, top, right, bottom, maxx, - maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw) + maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw) sizex = right - left + 1 sizey = bottom - top + 1 return sizex, sizey @@ -88,7 +88,8 @@ def ioctl_GWINSZ(fd): import termios import struct import os - cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) + cr = struct.unpack( + 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) except: return None return cr diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 45022353c1ccd..a9a6bab893ac1 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -114,7 +114,7 @@ def assert_almost_equal(a, b): if isinstance(a, (bool, float, int)): if np.isinf(a): - assert np.isinf(b), err_msg(a,b) + assert np.isinf(b), err_msg(a, b) # case for zero elif abs(a) < 1e-5: np.testing.assert_almost_equal( @@ -202,6 +202,7 @@ def assert_panel_equal(left, right, check_panel_type=False): for col in right: assert(col in left) + def assert_panel4d_equal(left, right): assert(left.labels.equals(right.labels)) assert(left.items.equals(right.items)) @@ -215,6 +216,7 @@ def assert_panel4d_equal(left, right): for col in right: assert(col in left) + def assert_contains_all(iterable, dic): for k in iterable: assert(k in dic) @@ -332,9 +334,11 @@ def makePanel(nper=None): data = dict((c, makeTimeDataFrame(nper)) for c in cols) return Panel.fromDict(data) + def makePanel4D(nper=None): - return Panel4D(dict(l1 = makePanel(nper), l2 = makePanel(nper), - l3 = makePanel(nper))) + return Panel4D(dict(l1=makePanel(nper), l2=makePanel(nper), + l3=makePanel(nper))) + def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, idx_type=None): @@ -362,15 +366,15 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, if ndupe_l is None: ndupe_l = [1] * nentries assert len(ndupe_l) <= nentries - assert names is None or names == False or names == True or len(names) \ - == nlevels + assert (names is None or names is False + or names is True or len(names) is nlevels) assert idx_type is None or \ - (idx_type in ('i', 'f', 's', 'u', 'dt') and nlevels == 1) + (idx_type in ('i', 'f', 's', 'u', 'dt') and nlevels == 1) - if names == True: + if names is True: # build default names names = [prefix + str(i) for i in range(nlevels)] - if names == False: + if names is False: # pass None to index constructor for no name names = None @@ -399,7 +403,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, tuples = [] for i in range(nlevels): - #build a list of lists to create the index from + # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 cnt = Counter() for j in range(div_factor): @@ -409,7 +413,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, result = list(sorted(cnt.elements()))[:nentries] tuples.append(result) - tuples=zip(*tuples) + tuples = zip(*tuples) # convert tuples to index if nentries == 1: @@ -418,6 +422,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, index = MultiIndex.from_tuples(tuples, names=names) return index + def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, c_idx_nlevels=1, r_idx_nlevels=1, data_gen_f=None, c_ndupe_l=None, r_ndupe_l=None, dtype=None, @@ -476,9 +481,9 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, assert c_idx_nlevels > 0 assert r_idx_nlevels > 0 assert r_idx_type is None or \ - (r_idx_type in ('i', 'f', 's', 'u', 'dt') and r_idx_nlevels == 1) + (r_idx_type in ('i', 'f', 's', 'u', 'dt') and r_idx_nlevels == 1) assert c_idx_type is None or \ - (c_idx_type in ('i', 'f', 's', 'u', 'dt') and c_idx_nlevels == 1) + (c_idx_type in ('i', 'f', 's', 'u', 'dt') and c_idx_nlevels == 1) columns = makeCustomIndex(ncols, nlevels=c_idx_nlevels, prefix='C', names=c_idx_names, ndupe_l=c_ndupe_l, @@ -489,12 +494,13 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, # by default, generate data based on location if data_gen_f is None: - data_gen_f = lambda r, c: "R%dC%d" % (r,c) + data_gen_f = lambda r, c: "R%dC%d" % (r, c) data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] return DataFrame(data, index, columns, dtype=dtype) + def add_nans(panel): I, J, N = panel.shape for i, item in enumerate(panel.items): @@ -502,11 +508,13 @@ def add_nans(panel): for j, col in enumerate(dm.columns): dm[col][:i + j] = np.NaN + def add_nans_panel4d(panel4d): for l, label in enumerate(panel4d.labels): panel = panel4d[label] add_nans(panel) + class TestSubDict(dict): def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) diff --git a/scripts/bench_join.py b/scripts/bench_join.py index d838cf9387269..be24dac810aee 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -9,10 +9,11 @@ pct_overlap = 0.2 a = np.arange(n, dtype=np.int64) -b = np.arange(n * pct_overlap, n*(1+pct_overlap), dtype=np.int64) +b = np.arange(n * pct_overlap, n * (1 + pct_overlap), dtype=np.int64) dr1 = DateRange('1/1/2000', periods=n, offset=datetools.Minute()) -dr2 = DateRange(dr1[int(pct_overlap*n)], periods=n, offset=datetools.Minute(2)) +dr2 = DateRange( + dr1[int(pct_overlap * n)], periods=n, offset=datetools.Minute(2)) aobj = a.astype(object) bobj = b.astype(object) @@ -29,11 +30,13 @@ a_frame = DataFrame(avf, index=a, columns=range(K)) b_frame = DataFrame(bvf, index=b, columns=range(K, 2 * K)) + def do_left_join(a, b, av, bv): out = np.empty((len(a), 2)) lib.left_join_1d(a, b, av, bv, out) return out + def do_outer_join(a, b, av, bv): result_index, aindexer, bindexer = lib.outer_join_indexer(a, b) result = np.empty((2, len(result_index))) @@ -41,6 +44,7 @@ def do_outer_join(a, b, av, bv): lib.take_1d(bv, bindexer, result[1]) return result_index, result + def do_inner_join(a, b, av, bv): result_index, aindexer, bindexer = lib.inner_join_indexer(a, b) result = np.empty((2, len(result_index))) @@ -53,6 +57,7 @@ def do_inner_join(a, b, av, bv): from pandas.util.testing import set_trace + def do_left_join_python(a, b, av, bv): indexer, mask = lib.ordered_left_join_int64(a, b) @@ -68,12 +73,14 @@ def do_left_join_python(a, b, av, bv): np.putmask(bchunk, np.tile(mask, bk), np.nan) return result + def _take_multi(data, indexer, out): if not data.flags.c_contiguous: data = data.copy() for i in xrange(data.shape[0]): data[i].take(indexer, out=out[i]) + def do_left_join_multi(a, b, av, bv): n, ak = av.shape _, bk = bv.shape @@ -81,6 +88,7 @@ def do_left_join_multi(a, b, av, bv): lib.left_join_2d(a, b, av, bv, result) return result + def do_outer_join_multi(a, b, av, bv): n, ak = av.shape _, bk = bv.shape @@ -92,6 +100,7 @@ def do_outer_join_multi(a, b, av, bv): # lib.take_axis0(bv, lindexer, out=result[ak:].T) return result_index, result + def do_inner_join_multi(a, b, av, bv): n, ak = av.shape _, bk = bv.shape @@ -103,6 +112,7 @@ def do_inner_join_multi(a, b, av, bv): # lib.take_axis0(bv, lindexer, out=result[ak:].T) return result_index, result + def do_left_join_multi_v2(a, b, av, bv): indexer, mask = lib.ordered_left_join_int64(a, b) bv_taken = bv.take(indexer, axis=0) @@ -113,6 +123,7 @@ def do_left_join_multi_v2(a, b, av, bv): def do_left_join_series(a, b): return b.reindex(a.index) + def do_left_join_frame(a, b): a.index._indexMap = None b.index._indexMap = None @@ -125,14 +136,16 @@ def do_left_join_frame(a, b): out = np.empty((10, 120000)) + def join(a, b, av, bv, how="left"): - func_dict = {'left' : do_left_join_multi, - 'outer' : do_outer_join_multi, - 'inner' : do_inner_join_multi} + func_dict = {'left': do_left_join_multi, + 'outer': do_outer_join_multi, + 'inner': do_inner_join_multi} f = func_dict[how] return f(a, b, av, bv) + def bench_python(n=100000, pct_overlap=0.20, K=1): import gc ns = [2, 3, 4, 5, 6] @@ -142,7 +155,7 @@ def bench_python(n=100000, pct_overlap=0.20, K=1): all_results = {} for logn in ns: - n = 10**logn + n = 10 ** logn a = np.arange(n, dtype=np.int64) b = np.arange(n * pct_overlap, n * pct_overlap + n, dtype=np.int64) @@ -171,6 +184,7 @@ def bench_python(n=100000, pct_overlap=0.20, K=1): return DataFrame(all_results, index=kinds) + def bench_xts(n=100000, pct_overlap=0.20): from pandas.rpy.common import r r('a <- 5') @@ -194,4 +208,3 @@ def bench_xts(n=100000, pct_overlap=0.20): elapsed = r('as.list(system.time(%s, gcFirst=F))$elapsed' % stmt)[0] result[kind] = (elapsed / iterations) * 1000 return Series(result) - diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index a0babaf59b990..cdac37f289bb8 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -12,19 +12,21 @@ zipped = izip(key1, key2) + def _zip(*args): arr = np.empty(N, dtype=object) arr[:] = zip(*args) return arr + def _zip2(*args): return lib.list_to_object_array(zip(*args)) index = MultiIndex.from_arrays([key1, key2]) -to_join = DataFrame({'j1' : np.random.randn(100000)}, index=index) +to_join = DataFrame({'j1': np.random.randn(100000)}, index=index) -data = DataFrame({'A' : np.random.randn(500000), - 'key1' : np.repeat(key1, 5), - 'key2' : np.repeat(key2, 5)}) +data = DataFrame({'A': np.random.randn(500000), + 'key1': np.repeat(key1, 5), + 'key2': np.repeat(key2, 5)}) # data.join(to_join, on=['key1', 'key2']) diff --git a/scripts/bench_refactor.py b/scripts/bench_refactor.py index 5ae36f7ddd058..3d0c7e40ced7d 100644 --- a/scripts/bench_refactor.py +++ b/scripts/bench_refactor.py @@ -11,6 +11,7 @@ N = 1000 K = 500 + def horribly_unconsolidated(): index = np.arange(N) @@ -21,16 +22,19 @@ def horribly_unconsolidated(): return df + def bench_reindex_index(df, it=100): new_idx = np.arange(0, N, 2) for i in xrange(it): df.reindex(new_idx) + def bench_reindex_columns(df, it=100): new_cols = np.arange(0, K, 2) for i in xrange(it): df.reindex(columns=new_cols) + def bench_join_index(df, it=10): left = df.reindex(index=np.arange(0, N, 2), columns=np.arange(K // 2)) diff --git a/scripts/faster_xs.py b/scripts/faster_xs.py index a539642b78185..2bb6271124c4f 100644 --- a/scripts/faster_xs.py +++ b/scripts/faster_xs.py @@ -13,4 +13,3 @@ blocks = df._data.blocks items = df.columns - diff --git a/scripts/file_sizes.py b/scripts/file_sizes.py index edbd23c80533d..8720730d2bb10 100644 --- a/scripts/file_sizes.py +++ b/scripts/file_sizes.py @@ -17,9 +17,11 @@ loc = '.' walked = os.walk(loc) + def _should_count_file(path): return path.endswith('.py') or path.endswith('.pyx') + def _is_def_line(line): """def/cdef/cpdef, but not `cdef class`""" return (line.endswith(':') and not 'class' in line.split() and @@ -28,6 +30,7 @@ def _is_def_line(line): line.startswith('cpdef ') or ' def ' in line or ' cdef ' in line or ' cpdef ' in line)) + class LengthCounter(object): """ should add option for subtracting nested function lengths?? @@ -100,15 +103,18 @@ def _push_count(self, start_pos): self.counts.append(len(func_lines)) + def _get_indent_level(line): level = 0 while line.startswith(' ' * level): level += 1 return level + def _is_triplequote(line): return line.startswith('"""') or line.startswith("'''") + def _get_file_function_lengths(path): lines = [x.rstrip() for x in open(path).readlines()] counter = LengthCounter(lines) @@ -145,6 +151,7 @@ def x(): result = counter.get_counts() assert(result == expected) + def doit(): for directory, _, files in walked: print directory @@ -160,8 +167,9 @@ def doit(): names.append(path) lengths.append(lines) - result = DataFrame({'dirs' : dirs, 'names' : names, - 'lengths' : lengths}) + result = DataFrame({'dirs': dirs, 'names': names, + 'lengths': lengths}) + def doit2(): counts = {} diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py index 4c12a43320923..c2ebbc88ed580 100644 --- a/scripts/gen_release_notes.py +++ b/scripts/gen_release_notes.py @@ -15,9 +15,10 @@ def __eq__(self, other): return self.number == other.number return False + class Issue(object): - def __init__(self, title, labels, number, milestone, body, state): + def __init__(self, title, labels, number, milestone, body, state): self.title = title self.labels = set([x['name'] for x in labels]) self.number = number @@ -30,6 +31,7 @@ def __eq__(self, other): return self.number == other.number return False + def get_issues(): all_issues = [] page_number = 1 @@ -41,6 +43,7 @@ def get_issues(): all_issues.extend(iss) return all_issues + def _get_page(page_number): gh_url = ('https://api.github.com/repos/pydata/pandas/issues?' 'milestone=*&state=closed&assignee=*&page=%d') % page_number @@ -52,11 +55,13 @@ def _get_page(page_number): for x in jsondata] return issues + def get_milestone(data): if data is None: return None return Milestone(data['title'], data['number']) + def collate_label(issues, label): lines = [] for x in issues: @@ -65,6 +70,7 @@ def collate_label(issues, label): return '\n'.join(lines) + def release_notes(milestone): issues = get_issues() diff --git a/scripts/groupby_sample.py b/scripts/groupby_sample.py index 63638ede83097..8685b2bbe8ff7 100644 --- a/scripts/groupby_sample.py +++ b/scripts/groupby_sample.py @@ -4,42 +4,46 @@ g1 = np.array(list(string.letters))[:-1] g2 = np.arange(510) -df_small = DataFrame({'group1' : ["a","b","a","a","b","c","c","c","c", - "c","a","a","a","b","b","b","b"], - 'group2' : [1,2,3,4,1,3,5,6,5,4,1,2,3,4,3,2,1], - 'value' : ["apple","pear","orange","apple", - "banana","durian","lemon","lime", - "raspberry","durian","peach","nectarine", - "banana","lemon","guava","blackberry", - "grape"]}) +df_small = DataFrame({'group1': ["a", "b", "a", "a", "b", "c", "c", "c", "c", + "c", "a", "a", "a", "b", "b", "b", "b"], + 'group2': [1, 2, 3, 4, 1, 3, 5, 6, 5, 4, 1, 2, 3, 4, 3, 2, 1], + 'value': ["apple", "pear", "orange", "apple", + "banana", "durian", "lemon", "lime", + "raspberry", "durian", "peach", "nectarine", + "banana", "lemon", "guava", "blackberry", + "grape"]}) value = df_small['value'].values.repeat(3) -df = DataFrame({'group1' : g1.repeat(4000 * 5), - 'group2' : np.tile(g2, 400 * 5), - 'value' : value.repeat(4000 * 5)}) +df = DataFrame({'group1': g1.repeat(4000 * 5), + 'group2': np.tile(g2, 400 * 5), + 'value': value.repeat(4000 * 5)}) def random_sample(): - grouped = df.groupby(['group1','group2'])['value'] + grouped = df.groupby(['group1', 'group2'])['value'] from random import choice choose = lambda group: choice(group.index) indices = grouped.apply(choose) return df.reindex(indices) + def random_sample_v2(): - grouped = df.groupby(['group1','group2'])['value'] + grouped = df.groupby(['group1', 'group2'])['value'] from random import choice choose = lambda group: choice(group.index) indices = [choice(v) for k, v in grouped.groups.iteritems()] return df.reindex(indices) + def do_shuffle(arr): - from random import shuffle - result = arr.copy().values - shuffle(result) - return result + from random import shuffle + result = arr.copy().values + shuffle(result) + return result + -def shuffle_uri(df,grouped): - perm = np.r_[tuple([np.random.permutation(idxs) for idxs in grouped.groups.itervalues()])] +def shuffle_uri(df, grouped): + perm = np.r_[tuple([np.random.permutation( + idxs) for idxs in grouped.groups.itervalues()])] df['state_permuted'] = np.asarray(df.ix[perm]['value']) df2 = df.copy() diff --git a/scripts/groupby_speed.py b/scripts/groupby_speed.py index c0fa44957d149..a25b00206733d 100644 --- a/scripts/groupby_speed.py +++ b/scripts/groupby_speed.py @@ -9,23 +9,26 @@ gp = rng5.asof grouped = df.groupby(gp) + def get1(dt): k = gp(dt) return grouped.get_group(k) + def get2(dt): k = gp(dt) return df.ix[grouped.groups[k]] + def f(): for i, date in enumerate(df.index): if i % 10000 == 0: print i get1(date) + def g(): for i, date in enumerate(df.index): if i % 10000 == 0: print i get2(date) - diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py index 6e4177e2fb0f1..76c9cb0cb3bc5 100644 --- a/scripts/groupby_test.py +++ b/scripts/groupby_test.py @@ -105,7 +105,8 @@ # f = lambda x: x -# transformed = df.groupby(lambda x: x.strftime('%m/%y')).transform(lambda x: x) +# transformed = df.groupby(lambda x: x.strftime('%m/%y')).transform(lambda +# x: x) # def ohlc(group): # return Series([group[0], group.max(), group.min(), group[-1]], @@ -133,10 +134,11 @@ b = np.tile(np.arange(100), 100) index = MultiIndex.from_arrays([a, b]) s = Series(np.random.randn(len(index)), index) -df = DataFrame({'A' : s}) +df = DataFrame({'A': s}) df['B'] = df.index.get_level_values(0) df['C'] = df.index.get_level_values(1) + def f(): for x in df.groupby(['B', 'B']): pass diff --git a/scripts/parser_magic.py b/scripts/parser_magic.py index 4eec900b880dd..c35611350988c 100644 --- a/scripts/parser_magic.py +++ b/scripts/parser_magic.py @@ -6,10 +6,12 @@ import inspect import sys + def merge(a, b): f, args, _ = parse_stmt(inspect.currentframe().f_back) - return DataFrame({args[0] : a, - args[1] : b}) + return DataFrame({args[0]: a, + args[1]: b}) + def parse_stmt(frame): info = inspect.getframeinfo(frame) @@ -22,6 +24,7 @@ def parse_stmt(frame): call = body return _parse_call(call) + def _parse_call(call): func = _maybe_format_attribute(call.func) @@ -35,6 +38,7 @@ def _parse_call(call): return func, str_args, {} + def _format_call(call): func, args, kwds = _parse_call(call) content = '' @@ -49,11 +53,13 @@ def _format_call(call): content += joined_kwds return '%s(%s)' % (func, content) + def _maybe_format_attribute(name): if isinstance(name, ast.Attribute): return _format_attribute(name) return name.id + def _format_attribute(attr): obj = attr.value if isinstance(attr.value, ast.Attribute): diff --git a/scripts/preepoch_test.py b/scripts/preepoch_test.py index b65f09c8172ff..59066ba832cd0 100644 --- a/scripts/preepoch_test.py +++ b/scripts/preepoch_test.py @@ -1,20 +1,21 @@ import numpy as np from pandas import * + def panda_test(): # generate some data - data = np.random.rand(50,5) + data = np.random.rand(50, 5) # generate some dates - dates = DateRange('1/1/1969',periods=50) + dates = DateRange('1/1/1969', periods=50) # generate column headings - cols = ['A','B','C','D','E'] + cols = ['A', 'B', 'C', 'D', 'E'] - df = DataFrame(data,index=dates,columns=cols) + df = DataFrame(data, index=dates, columns=cols) # save to HDF5Store store = HDFStore('bugzilla.h5', mode='w') - store['df'] = df # This gives: OverflowError: mktime argument out of range + store['df'] = df # This gives: OverflowError: mktime argument out of range store.close() diff --git a/scripts/pypistats.py b/scripts/pypistats.py index 6054c356f8c0b..e64be63551fde 100644 --- a/scripts/pypistats.py +++ b/scripts/pypistats.py @@ -14,6 +14,7 @@ locale.setlocale(locale.LC_ALL, '') + class PyPIDownloadAggregator(object): def __init__(self, package_name, include_hidden=True): @@ -30,7 +31,8 @@ def releases(self): self.include_hidden) if len(result) == 0: - # no matching package--search for possibles, and limit to 15 results + # no matching package--search for possibles, and limit to 15 + # results results = self.proxy.search({ 'name': self.package_name, 'description': self.package_name diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py index b7e412390a22f..6441a69f3a8bf 100644 --- a/scripts/roll_median_leak.py +++ b/scripts/roll_median_leak.py @@ -20,5 +20,5 @@ for _ in xrange(10000): print proc.get_memory_info() - sdf = SparseDataFrame({'A' : lst.to_array()}) + sdf = SparseDataFrame({'A': lst.to_array()}) chunk = sdf[sdf['A'] == 5] diff --git a/scripts/runtests.py b/scripts/runtests.py index 7816ac25db9d2..b995db65ac591 100644 --- a/scripts/runtests.py +++ b/scripts/runtests.py @@ -1,3 +1,4 @@ -import os; print os.getpid() +import os +print os.getpid() import nose nose.main('pandas.core') diff --git a/scripts/testmed.py b/scripts/testmed.py index 1184fee82efd3..ed0f76cd2f3fb 100644 --- a/scripts/testmed.py +++ b/scripts/testmed.py @@ -3,11 +3,14 @@ from random import random from math import log, ceil + class Node(object): __slots__ = 'value', 'next', 'width' + def __init__(self, value, next, width): self.value, self.next, self.width = value, next, width + class End(object): 'Sentinel object that always compares greater than another object' def __cmp__(self, other): @@ -15,13 +18,14 @@ def __cmp__(self, other): NIL = Node(End(), [], []) # Singleton terminator node + class IndexableSkiplist: 'Sorted collection supporting O(lg n) insertion, removal, and lookup by rank.' def __init__(self, expected_size=100): self.size = 0 self.maxlevels = int(1 + log(expected_size, 2)) - self.head = Node('HEAD', [NIL]*self.maxlevels, [1]*self.maxlevels) + self.head = Node('HEAD', [NIL] * self.maxlevels, [1] * self.maxlevels) def __len__(self): return self.size @@ -48,7 +52,7 @@ def insert(self, value): # insert a link to the newnode at each level d = min(self.maxlevels, 1 - int(log(random(), 2.0))) - newnode = Node(value, [None]*d, [None]*d) + newnode = Node(value, [None] * d, [None] * d) steps = 0 for level in range(d): prevnode = chain[level] @@ -92,6 +96,7 @@ def __iter__(self): from collections import deque from itertools import islice + class RunningMedian: 'Fast running median with O(lg n) updates where n is the window size' @@ -121,6 +126,7 @@ def __iter__(self): import time + def test(): from numpy.random import randn @@ -135,12 +141,14 @@ def _test(arr, k): from numpy.random import randn from pandas.lib.skiplist import rolling_median + def test2(): arr = randn(N) return rolling_median(arr, K) + def runmany(f, arr, arglist): timings = [] @@ -152,6 +160,7 @@ def runmany(f, arr, arglist): return timings + def _time(f, *args): _start = time.clock() result = f(*args) diff --git a/scripts/touchup_gh_issues.py b/scripts/touchup_gh_issues.py index 924a236f629f6..96ee220f55a02 100755 --- a/scripts/touchup_gh_issues.py +++ b/scripts/touchup_gh_issues.py @@ -14,8 +14,10 @@ pat = "((?:\s*GH\s*)?)#(\d{3,4})([^_]|$)?" rep_pat = r"\1GH\2_\3" -anchor_pat =".. _GH{id}: https://github.com/pydata/pandas/issues/{id}" +anchor_pat = ".. _GH{id}: https://github.com/pydata/pandas/issues/{id}" section_pat = "^pandas\s[\d\.]+\s*$" + + def main(): issues = OrderedDict() while True: @@ -24,19 +26,19 @@ def main(): if not line: break - if re.search(section_pat,line): + if re.search(section_pat, line): for id in issues: print(anchor_pat.format(id=id).rstrip()) if issues: print("\n") issues = OrderedDict() - for m in re.finditer(pat,line): + for m in re.finditer(pat, line): id = m.group(2) if id not in issues: issues[id] = True - print(re.sub(pat, rep_pat,line).rstrip()) + print(re.sub(pat, rep_pat, line).rstrip()) pass if __name__ == "__main__": - main() + main() diff --git a/setup.py b/setup.py index 8b9b6665b381f..eeb6cd3e871a8 100755 --- a/setup.py +++ b/setup.py @@ -12,18 +12,18 @@ import warnings try: - BUILD_CACHE_DIR=None + BUILD_CACHE_DIR = None # uncomment to activate the build cache - #BUILD_CACHE_DIR="/tmp/.pandas_build_cache/" + # BUILD_CACHE_DIR="/tmp/.pandas_build_cache/" if os.isdir(BUILD_CACHE_DIR): print("--------------------------------------------------------") print("BUILD CACHE ACTIVATED. be careful, this is experimental.") print("--------------------------------------------------------") else: - BUILD_CACHE_DIR=None -except : + BUILD_CACHE_DIR = None +except: pass # may need to work around setuptools bug by providing a fake Pyrex @@ -52,7 +52,7 @@ if sys.version_info[0] >= 3: min_numpy_ver = 1.6 - if sys.version_info[1] >= 3: # 3.3 needs numpy 1.7+ + if sys.version_info[1] >= 3: # 3.3 needs numpy 1.7+ min_numpy_ver = "1.7.0b2" setuptools_kwargs = {'use_2to3': True, @@ -61,18 +61,18 @@ 'pytz', 'numpy >= %s' % min_numpy_ver], 'use_2to3_exclude_fixers': ['lib2to3.fixes.fix_next', - ], - } + ], + } if not _have_setuptools: sys.exit("need setuptools/distribute for Py3k" - "\n$ pip install distribute") + "\n$ pip install distribute") else: setuptools_kwargs = { 'install_requires': ['python-dateutil', 'pytz', 'numpy >= 1.6.1'], - 'zip_safe' : False, + 'zip_safe': False, } if not _have_setuptools: @@ -89,7 +89,7 @@ import numpy as np except ImportError: nonumpy_msg = ("# numpy needed to finish setup. run:\n\n" - " $ pip install numpy # or easy_install numpy\n") + " $ pip install numpy # or easy_install numpy\n") sys.exit(nonumpy_msg) if np.__version__ < '1.6.1': @@ -103,10 +103,10 @@ try: from Cython.Distutils import build_ext - #from Cython.Distutils import Extension # to get pyrex debugging symbols - cython=True + # from Cython.Distutils import Extension # to get pyrex debugging symbols + cython = True except ImportError: - cython=False + cython = False from os.path import splitext, basename, join as pjoin @@ -215,8 +215,9 @@ stdout=subprocess.PIPE).stdout except OSError: # msysgit compatibility - pipe = subprocess.Popen(["git.cmd", "rev-parse", "--short", "HEAD"], - stdout=subprocess.PIPE).stdout + pipe = subprocess.Popen( + ["git.cmd", "rev-parse", "--short", "HEAD"], + stdout=subprocess.PIPE).stdout rev = pipe.read().strip() # makes distutils blow up on Python 2.7 if sys.version_info[0] >= 3: @@ -228,13 +229,15 @@ else: FULLVERSION += QUALIFIER + def write_version_py(filename=None): cnt = """\ version = '%s' short_version = '%s' """ if not filename: - filename = os.path.join(os.path.dirname(__file__), 'pandas', 'version.py') + filename = os.path.join( + os.path.dirname(__file__), 'pandas', 'version.py') a = open(filename, 'w') try: @@ -242,10 +245,11 @@ def write_version_py(filename=None): finally: a.close() + class CleanCommand(Command): """Custom distutils command to clean the .so and .pyc files.""" - user_options = [("all", "a", "") ] + user_options = [("all", "a", "")] def initialize_options(self): self.all = True @@ -287,6 +291,7 @@ def run(self): except Exception: pass + class CheckSDist(sdist): """Custom sdist that ensures Cython has compiled all pyx files to c.""" @@ -314,20 +319,21 @@ def run(self): self.run_command('cython') else: for pyxfile in self._pyxfiles: - cfile = pyxfile[:-3]+'c' - msg = "C-source file '%s' not found."%(cfile)+\ - " Run 'setup.py cython' before sdist." + cfile = pyxfile[:-3] + 'c' + msg = "C-source file '%s' not found." % (cfile) +\ + " Run 'setup.py cython' before sdist." assert os.path.isfile(cfile), msg sdist.run(self) + class CheckingBuildExt(build_ext): """Subclass build_ext to get clearer report if Cython is necessary.""" def check_cython_extensions(self, extensions): for ext in extensions: - for src in ext.sources: - if not os.path.exists(src): - raise Exception("""Cython-generated file '%s' not found. + for src in ext.sources: + if not os.path.exists(src): + raise Exception("""Cython-generated file '%s' not found. Cython is required to compile pandas from a development branch. Please install Cython or download a release package of pandas. """ % src) @@ -339,39 +345,41 @@ def build_extensions(self): for ext in self.extensions: self.build_extension(ext) + class CompilationCacheMixin(object): - def __init__(self,*args,**kwds): - cache_dir=kwds.pop("cache_dir",BUILD_CACHE_DIR) - self.cache_dir=cache_dir + def __init__(self, *args, **kwds): + cache_dir = kwds.pop("cache_dir", BUILD_CACHE_DIR) + self.cache_dir = cache_dir if not os.path.isdir(cache_dir): - raise Exception("Error: path to Cache directory (%s) is not a dir" % cache_dir); + raise Exception("Error: path to Cache directory (%s) is not a dir" % cache_dir) - def _copy_from_cache(self,hash,target): - src=os.path.join(self.cache_dir,hash) + def _copy_from_cache(self, hash, target): + src = os.path.join(self.cache_dir, hash) if os.path.exists(src): - # print("Cache HIT: asked to copy file %s in %s" % (src,os.path.abspath(target))) - s="." + # print("Cache HIT: asked to copy file %s in %s" % + # (src,os.path.abspath(target))) + s = "." for d in target.split(os.path.sep)[:-1]: - s=os.path.join(s,d) + s = os.path.join(s, d) if not os.path.exists(s): os.mkdir(s) - shutil.copyfile(src,target) + shutil.copyfile(src, target) return True return False - def _put_to_cache(self,hash,src): - target=os.path.join(self.cache_dir,hash) + def _put_to_cache(self, hash, src): + target = os.path.join(self.cache_dir, hash) # print( "Cache miss: asked to copy file from %s to %s" % (src,target)) - s="." + s = "." for d in target.split(os.path.sep)[:-1]: - s=os.path.join(s,d) + s = os.path.join(s, d) if not os.path.exists(s): os.mkdir(s) - shutil.copyfile(src,target) + shutil.copyfile(src, target) - def _hash_obj(self,obj): + def _hash_obj(self, obj): """ you should override this method to provide a sensible implementation of hashing functions for your intended objects @@ -390,7 +398,7 @@ def get_ext_fullpath(self, ext_name): """ import string # makes sure the extension name is only using dots - all_dots = string.maketrans('/'+os.sep, '..') + all_dots = string.maketrans('/' + os.sep, '..') ext_name = ext_name.translate(all_dots) fullname = self.get_ext_fullname(ext_name) @@ -402,7 +410,7 @@ def get_ext_fullpath(self, ext_name): # no further work needed # returning : # build_dir/package/path/filename - filename = os.path.join(*modpath[:-1]+[filename]) + filename = os.path.join(*modpath[:-1] + [filename]) return os.path.join(self.build_lib, filename) # the inplace option requires to find the package directory @@ -415,23 +423,24 @@ def get_ext_fullpath(self, ext_name): # package_dir/filename return os.path.join(package_dir, filename) + class CompilationCacheExtMixin(CompilationCacheMixin): - def __init__(self,*args,**kwds): - CompilationCacheMixin.__init__(self,*args,**kwds) + def __init__(self, *args, **kwds): + CompilationCacheMixin.__init__(self, *args, **kwds) - def _hash_file(self,fname): + def _hash_file(self, fname): from hashlib import sha1 try: - hash=sha1() + hash = sha1() hash.update(self.build_lib.encode('utf-8')) try: if sys.version_info[0] >= 3: import io - f=io.open(fname,"rb") + f = io.open(fname, "rb") else: - f=open(fname) + f = open(fname) - first_line=f.readline() + first_line = f.readline() # ignore cython generation timestamp header if "Generated by Cython" not in first_line.decode('utf-8'): hash.update(first_line) @@ -447,21 +456,21 @@ def _hash_file(self,fname): except IOError: return None - def _hash_obj(self,ext): + def _hash_obj(self, ext): from hashlib import sha1 sources = ext.sources - if sources is None or \ - (not hasattr(sources,'__iter__') ) or \ - isinstance(sources,str) or \ - sys.version[0]==2 and isinstance(sources,unicode): #argh + if (sources is None or + (not hasattr(sources, '__iter__')) or + isinstance(sources, str) or + sys.version[0] == 2 and isinstance(sources, unicode)): # argh return False sources = list(sources) + ext.depends - hash=sha1() + hash = sha1() try: for fname in sources: - fhash=self._hash_file(fname) + fhash = self._hash_file(fname) if fhash: hash.update(fhash.encode('utf-8')) except: @@ -469,53 +478,54 @@ def _hash_obj(self,ext): return hash.hexdigest() -class CachingBuildExt(build_ext,CompilationCacheExtMixin): - def __init__(self,*args,**kwds): - CompilationCacheExtMixin.__init__(self,*args,**kwds) - kwds.pop("cache_dir",None) - build_ext.__init__(self,*args,**kwds) - def build_extension(self, ext,*args,**kwds): +class CachingBuildExt(build_ext, CompilationCacheExtMixin): + def __init__(self, *args, **kwds): + CompilationCacheExtMixin.__init__(self, *args, **kwds) + kwds.pop("cache_dir", None) + build_ext.__init__(self, *args, **kwds) + + def build_extension(self, ext, *args, **kwds): ext_path = self.get_ext_fullpath(ext.name) - build_path = os.path.join(self.build_lib,os.path.basename(ext_path)) + build_path = os.path.join(self.build_lib, os.path.basename(ext_path)) - hash=self._hash_obj(ext) - if hash and self._copy_from_cache(hash,ext_path): + hash = self._hash_obj(ext) + if hash and self._copy_from_cache(hash, ext_path): return - build_ext.build_extension(self,ext,*args,**kwds) + build_ext.build_extension(self, ext, *args, **kwds) - hash=self._hash_obj(ext) + hash = self._hash_obj(ext) if os.path.exists(build_path): - self._put_to_cache(hash,build_path) # build_ext + self._put_to_cache(hash, build_path) # build_ext if os.path.exists(ext_path): - self._put_to_cache(hash,ext_path) # develop - + self._put_to_cache(hash, ext_path) # develop def cython_sources(self, sources, extension): import re cplus = self.cython_cplus or getattr(extension, 'cython_cplus', 0) or \ - (extension.language and extension.language.lower() == 'c++') + (extension.language and extension.language.lower() == 'c++') target_ext = '.c' if cplus: target_ext = '.cpp' - for i,s in enumerate(sources): - if not re.search("\.(pyx|pxi|pxd)$",s): + for i, s in enumerate(sources): + if not re.search("\.(pyx|pxi|pxd)$", s): continue - ext_dir=os.path.dirname(s) - ext_basename=re.sub("\.[^\.]+$","",os.path.basename(s)) - ext_basename += target_ext - target= os.path.join(ext_dir,ext_basename) - hash=self._hash_file(s) - sources[i]=target - if hash and self._copy_from_cache(hash,target): + ext_dir = os.path.dirname(s) + ext_basename = re.sub("\.[^\.]+$", "", os.path.basename(s)) + ext_basename += target_ext + target = os.path.join(ext_dir, ext_basename) + hash = self._hash_file(s) + sources[i] = target + if hash and self._copy_from_cache(hash, target): continue - build_ext.cython_sources(self,[s],extension) - self._put_to_cache(hash,target) + build_ext.cython_sources(self, [s], extension) + self._put_to_cache(hash, target) return sources + class CythonCommand(build_ext): """Custom distutils command subclassed from Cython.Distutils.build_ext to compile pyx->c, and stop there. All this does is override the @@ -523,14 +533,18 @@ class CythonCommand(build_ext): def build_extension(self, ext): pass + class DummyBuildSrc(Command): """ numpy's build_src command interferes with Cython's build_ext. """ user_options = [] + def initialize_options(self): self.py_modules_dict = {} + def finalize_options(self): pass + def run(self): pass @@ -541,18 +555,19 @@ def run(self): if cython: suffix = '.pyx' cmdclass['build_ext'] = build_ext - if BUILD_CACHE_DIR: # use the cache + if BUILD_CACHE_DIR: # use the cache cmdclass['build_ext'] = CachingBuildExt cmdclass['cython'] = CythonCommand else: suffix = '.c' cmdclass['build_src'] = DummyBuildSrc - cmdclass['build_ext'] = build_ext + cmdclass['build_ext'] = build_ext lib_depends = ['reduce', 'inference', 'properties'] + def srcpath(name=None, suffix='.pyx', subdir='src'): - return pjoin('pandas', subdir, name+suffix) + return pjoin('pandas', subdir, name + suffix) if suffix == '.pyx': lib_depends = [srcpath(f, suffix='.pyx') for f in lib_depends] @@ -563,8 +578,9 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): common_include = [np.get_include(), 'pandas/src/klib', 'pandas/src'] + def pxd(name): - return os.path.abspath(pjoin('pandas', name+'.pxd')) + return os.path.abspath(pjoin('pandas', name + '.pxd')) lib_depends = lib_depends + ['pandas/src/numpy_helper.h', @@ -681,23 +697,23 @@ def pxd(name): 'pandas.io.tests', 'pandas.stats.tests', ], - package_data={'pandas.io' : ['tests/*.h5', - 'tests/*.csv', - 'tests/*.txt', - 'tests/*.xls', - 'tests/*.xlsx', - 'tests/*.table'], + package_data={'pandas.io': ['tests/*.h5', + 'tests/*.csv', + 'tests/*.txt', + 'tests/*.xls', + 'tests/*.xlsx', + 'tests/*.table'], 'pandas.tools': ['tests/*.csv'], - 'pandas.tests' : ['data/*.pickle', - 'data/*.csv'], - 'pandas.tseries.tests' : ['data/*.pickle', - 'data/*.csv'] - }, + 'pandas.tests': ['data/*.pickle', + 'data/*.csv'], + 'pandas.tseries.tests': ['data/*.pickle', + 'data/*.csv'] + }, ext_modules=extensions, maintainer_email=EMAIL, description=DESCRIPTION, license=LICENSE, - cmdclass = cmdclass, + cmdclass=cmdclass, url=URL, download_url=DOWNLOAD_URL, long_description=LONG_DESCRIPTION, diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index 61bf7aa070e1f..cd14b3b5f383b 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -55,9 +55,10 @@ def f(): df = DataFrame(randn(1000, 1000)) """ -groupby_frame_cython_many_columns = Benchmark('df.groupby(labels).sum()', setup, - start_date=datetime(2011, 8, 1), - logy=True) +groupby_frame_cython_many_columns = Benchmark( + 'df.groupby(labels).sum()', setup, + start_date=datetime(2011, 8, 1), + logy=True) #---------------------------------------------------------------------- # single key, long, integer key @@ -183,7 +184,7 @@ def f(): start_date=datetime(2012, 5, 1)) groupby_last = Benchmark('data.groupby(labels).last()', setup, - start_date=datetime(2012, 5, 1)) + start_date=datetime(2012, 5, 1)) #---------------------------------------------------------------------- diff --git a/vb_suite/hdfstore_bench.py b/vb_suite/hdfstore_bench.py index 23303f335af7e..8f66cc04a5ec9 100644 --- a/vb_suite/hdfstore_bench.py +++ b/vb_suite/hdfstore_bench.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -start_date = datetime(2012,7,1) +start_date = datetime(2012, 7, 1) common_setup = """from pandas_vb_common import * import os @@ -28,7 +28,7 @@ def remove(f): store.put('df1',df) """ -read_store = Benchmark("store.get('df1')", setup1, cleanup = "store.close()", +read_store = Benchmark("store.get('df1')", setup1, cleanup="store.close()", start_date=start_date) @@ -44,8 +44,9 @@ def remove(f): store = HDFStore(f) """ -write_store = Benchmark("store.put('df2',df)", setup2, cleanup = "store.close()", - start_date=start_date) +write_store = Benchmark( + "store.put('df2',df)", setup2, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- # get from a store (mixed) @@ -63,8 +64,9 @@ def remove(f): store.put('df3',df) """ -read_store_mixed = Benchmark("store.get('df3')", setup3, cleanup = "store.close()", - start_date=start_date) +read_store_mixed = Benchmark( + "store.get('df3')", setup3, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- @@ -82,8 +84,9 @@ def remove(f): store = HDFStore(f) """ -write_store_mixed = Benchmark("store.put('df4',df)", setup4, cleanup = "store.close()", - start_date=start_date) +write_store_mixed = Benchmark( + "store.put('df4',df)", setup4, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- # get from a table (mixed) @@ -102,8 +105,9 @@ def remove(f): store.append('df5',df) """ -read_store_table_mixed = Benchmark("store.select('df5')", setup5, cleanup = "store.close()", - start_date=start_date) +read_store_table_mixed = Benchmark( + "store.select('df5')", setup5, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- @@ -121,8 +125,9 @@ def remove(f): store = HDFStore(f) """ -write_store_table_mixed = Benchmark("store.append('df6',df)", setup6, cleanup = "store.close()", - start_date=start_date) +write_store_table_mixed = Benchmark( + "store.append('df6',df)", setup6, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- # select from a table @@ -138,8 +143,9 @@ def remove(f): store.append('df7',df) """ -read_store_table = Benchmark("store.select('df7')", setup7, cleanup = "store.close()", - start_date=start_date) +read_store_table = Benchmark( + "store.select('df7')", setup7, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- @@ -154,8 +160,9 @@ def remove(f): store = HDFStore(f) """ -write_store_table = Benchmark("store.append('df8',df)", setup8, cleanup = "store.close()", - start_date=start_date) +write_store_table = Benchmark( + "store.append('df8',df)", setup8, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- # get from a table (wide) @@ -168,8 +175,9 @@ def remove(f): store.append('df9',df) """ -read_store_table_wide = Benchmark("store.select('df9')", setup9, cleanup = "store.close()", - start_date=start_date) +read_store_table_wide = Benchmark( + "store.select('df9')", setup9, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- @@ -182,8 +190,9 @@ def remove(f): store = HDFStore(f) """ -write_store_table_wide = Benchmark("store.append('df10',df)", setup10, cleanup = "store.close()", - start_date=start_date) +write_store_table_wide = Benchmark( + "store.append('df10',df)", setup10, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- # get from a table (wide) (indexed) @@ -198,8 +207,9 @@ def remove(f): store.create_table_index('df11') """ -query_store_table_wide = Benchmark("store.select('df11', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup11, cleanup = "store.close()", - start_date=start_date) +query_store_table_wide = Benchmark( + "store.select('df11', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup11, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- @@ -217,8 +227,9 @@ def remove(f): store.create_table_index('df12') """ -query_store_table = Benchmark("store.select('df12', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup12, cleanup = "store.close()", - start_date=start_date) +query_store_table = Benchmark( + "store.select('df12', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup12, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- # select from a panel table @@ -232,8 +243,9 @@ def remove(f): store.append('p1',p) """ -read_store_table_panel = Benchmark("store.select('p1')", setup13, cleanup = "store.close()", - start_date=start_date) +read_store_table_panel = Benchmark( + "store.select('p1')", setup13, cleanup="store.close()", + start_date=start_date) #---------------------------------------------------------------------- @@ -247,6 +259,6 @@ def remove(f): store = HDFStore(f) """ -write_store_table_panel = Benchmark("store.append('p2',p)", setup14, cleanup = "store.close()", - start_date=start_date) - +write_store_table_panel = Benchmark( + "store.append('p2',p)", setup14, cleanup="store.close()", + start_date=start_date) diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index c0f9c5a59e182..0c4898089a97f 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -42,7 +42,7 @@ """ statement = "df[col][idx]" bm_df_getitem = Benchmark(statement, setup, - name='dataframe_getitem_scalar') + name='dataframe_getitem_scalar') setup = common_setup + """ try: @@ -59,7 +59,7 @@ """ statement = "df[col][idx]" bm_df_getitem2 = Benchmark(statement, setup, - name='datamatrix_getitem_scalar') + name='datamatrix_getitem_scalar') setup = common_setup + """ try: @@ -104,9 +104,9 @@ midx = midx.take(np.random.permutation(np.arange(100000))) """ sort_level_zero = Benchmark("midx.sortlevel(0)", setup, - start_date=datetime(2012,1,1)) + start_date=datetime(2012, 1, 1)) sort_level_one = Benchmark("midx.sortlevel(1)", setup, - start_date=datetime(2012,1,1)) + start_date=datetime(2012, 1, 1)) #---------------------------------------------------------------------- # Panel subset selection diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py index d031e78b05ece..0b158e4173a5c 100644 --- a/vb_suite/join_merge.py +++ b/vb_suite/join_merge.py @@ -86,10 +86,8 @@ # DataFrame joins on index - #---------------------------------------------------------------------- # Merges - setup = common_setup + """ N = 10000 diff --git a/vb_suite/make.py b/vb_suite/make.py index b17a19030d971..5a8a8215db9a4 100755 --- a/vb_suite/make.py +++ b/vb_suite/make.py @@ -25,11 +25,13 @@ SPHINX_BUILD = 'sphinxbuild' + def upload(): 'push a copy to the site' os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/vbench/ -essh') + def clean(): if os.path.exists('build'): shutil.rmtree('build') @@ -37,12 +39,14 @@ def clean(): if os.path.exists('source/generated'): shutil.rmtree('source/generated') + def html(): check_build() if os.system('sphinx-build -P -b html -d build/doctrees ' 'source build/html'): raise SystemExit("Building HTML failed.") + def check_build(): build_dirs = [ 'build', 'build/doctrees', 'build/html', @@ -54,10 +58,12 @@ def check_build(): except OSError: pass + def all(): clean() html() + def auto_update(): msg = '' try: @@ -69,6 +75,7 @@ def auto_update(): msg += str(inst) + '\n' sendmail(msg) + def sendmail(err_msg=None): from_name, to_name = _get_config() @@ -98,6 +105,7 @@ def sendmail(err_msg=None): finally: server.close() + def _get_dir(subdir=None): import getpass USERNAME = getpass.getuser() @@ -111,6 +119,7 @@ def _get_dir(subdir=None): conf_dir = '%s%s' % (HOME, subdir) return conf_dir + def _get_credentials(): tmp_dir = _get_dir() cred = '%s/credentials' % tmp_dir @@ -125,6 +134,7 @@ def _get_credentials(): return server, port, login, pwd + def _get_config(): tmp_dir = _get_dir() with open('%s/addresses' % tmp_dir, 'r') as fh: @@ -132,26 +142,26 @@ def _get_config(): return from_name, to_name funcd = { - 'html' : html, - 'clean' : clean, - 'upload' : upload, - 'auto_update' : auto_update, - 'all' : all, - } + 'html': html, + 'clean': clean, + 'upload': upload, + 'auto_update': auto_update, + 'all': all, +} small_docs = False # current_dir = os.getcwd() # os.chdir(os.path.dirname(os.path.join(current_dir, __file__))) -if len(sys.argv)>1: +if len(sys.argv) > 1: for arg in sys.argv[1:]: func = funcd.get(arg) if func is None: - raise SystemExit('Do not know how to handle %s; valid args are %s'%( - arg, funcd.keys())) + raise SystemExit('Do not know how to handle %s; valid args are %s' % ( + arg, funcd.keys())) func() else: small_docs = False all() -#os.chdir(current_dir) +# os.chdir(current_dir) diff --git a/vb_suite/measure_memory_consumption.py b/vb_suite/measure_memory_consumption.py index cdc2fe0d4b1f1..bb73cf5da4302 100755 --- a/vb_suite/measure_memory_consumption.py +++ b/vb_suite/measure_memory_consumption.py @@ -8,6 +8,7 @@ long summary """ + def main(): import shutil import tempfile @@ -21,27 +22,28 @@ def main(): from memory_profiler import memory_usage - warnings.filterwarnings('ignore',category=FutureWarning) + warnings.filterwarnings('ignore', category=FutureWarning) try: - TMP_DIR = tempfile.mkdtemp() - runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, - TMP_DIR, PREPARE, always_clean=True, - # run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) + TMP_DIR = tempfile.mkdtemp() + runner = BenchmarkRunner( + benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, + TMP_DIR, PREPARE, always_clean=True, + # run_option='eod', start_date=START_DATE, + module_dependencies=dependencies) results = {} for b in runner.benchmarks: - k=b.name + k = b.name try: - vs=memory_usage((b.run,)) + vs = memory_usage((b.run,)) v = max(vs) - #print(k, v) - results[k]=v + # print(k, v) + results[k] = v except Exception as e: print("Exception caught in %s\n" % k) print(str(e)) - s=Series(results) + s = Series(results) s.sort() print((s)) @@ -49,6 +51,5 @@ def main(): shutil.rmtree(TMP_DIR) - if __name__ == "__main__": - main() + main() diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index 4f5f09f0df592..aa647866925b7 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -10,19 +10,21 @@ import urllib2 import json -import pandas as pd +import pandas as pd WEB_TIMEOUT = 10 + def get_travis_data(): """figure out what worker we're running on, and the number of jobs it's running """ import os - jobid=os.environ.get("TRAVIS_JOB_ID") + jobid = os.environ.get("TRAVIS_JOB_ID") if not jobid: return None, None - workers=json.loads(urllib2.urlopen("https://api.travis-ci.org/workers/").read()) + workers = json.loads( + urllib2.urlopen("https://api.travis-ci.org/workers/").read()) host = njobs = None for item in workers: @@ -31,10 +33,12 @@ def get_travis_data(): if id and str(id) == str(jobid): break if host: - njobs = len([x for x in workers if host in x['host'] and x['payload']]) + njobs = len( + [x for x in workers if host in x['host'] and x['payload']]) return host, njobs + def get_utcdatetime(): try: from datetime import datetime @@ -42,26 +46,28 @@ def get_utcdatetime(): except: pass -def dump_as_gist(data,desc="The Commit",njobs=None): + +def dump_as_gist(data, desc="The Commit", njobs=None): host, njobs2 = get_travis_data()[:2] - if njobs: # be slightly more reliable - njobs= max(njobs,njobs2) + if njobs: # be slightly more reliable + njobs = max(njobs, njobs2) - content=dict(version="0.1.1", - timings=data, - datetime=get_utcdatetime() , # added in 0.1.1 - hostname=host , # added in 0.1.1 - njobs=njobs # added in 0.1.1, a measure of load on the travis box - ) + content = dict(version="0.1.1", + timings=data, + datetime=get_utcdatetime(), # added in 0.1.1 + hostname=host, # added in 0.1.1 + njobs=njobs # added in 0.1.1, a measure of load on the travis box + ) - payload=dict(description=desc, - public=True, - files={'results.json': dict(content=json.dumps(content))}) + payload = dict(description=desc, + public=True, + files={'results.json': dict(content=json.dumps(content))}) try: - r=urllib2.urlopen("https://api.github.com/gists", json.dumps(payload),timeout=WEB_TIMEOUT) - if 200 <=r.getcode() <300: - print("\n\n"+ "-"*80) + r = urllib2.urlopen("https://api.github.com/gists", + json.dumps(payload), timeout=WEB_TIMEOUT) + if 200 <= r.getcode() < 300: + print("\n\n" + "-" * 80) gist = json.loads(r.read()) file_raw_url = gist['files'].items()[0][1]['raw_url'] @@ -69,48 +75,49 @@ def dump_as_gist(data,desc="The Commit",njobs=None): print("[vbench-html-url] %s" % gist['html_url']) print("[vbench-api-url] %s" % gist['url']) - print("-"*80 +"\n\n") + print("-" * 80 + "\n\n") else: print("api.github.com returned status %d" % r.getcode()) except: print("Error occured while dumping to gist") + def main(): import warnings from suite import benchmarks - exit_code=0 - warnings.filterwarnings('ignore',category=FutureWarning) + exit_code = 0 + warnings.filterwarnings('ignore', category=FutureWarning) host, njobs = get_travis_data()[:2] - results=[] + results = [] for b in benchmarks: try: - d=b.run() + d = b.run() d.update(dict(name=b.name)) results.append(d) - msg="{name:<40}: {timing:> 10.4f} [ms]" + msg = "{name:<40}: {timing:> 10.4f} [ms]" print(msg.format(name=results[-1]['name'], timing=results[-1]['timing'])) except Exception as e: - exit_code=1 + exit_code = 1 if (type(e) == KeyboardInterrupt or - 'KeyboardInterrupt' in str(d)) : + 'KeyboardInterrupt' in str(d)): raise KeyboardInterrupt() - msg="{name:<40}: ERROR:\n<-------" + msg = "{name:<40}: ERROR:\n<-------" print(msg.format(name=results[-1]['name'])) - if isinstance(d,dict): + if isinstance(d, dict): if d['succeeded']: print("\nException:\n%s\n" % str(e)) else: - for k,v in sorted(d.iteritems()): - print("{k}: {v}".format(k=k,v=v)) + for k, v in sorted(d.iteritems()): + print("{k}: {v}".format(k=k, v=v)) print("------->\n") - dump_as_gist(results,"testing",njobs=njobs) + dump_as_gist(results, "testing", njobs=njobs) return exit_code @@ -122,102 +129,111 @@ def main(): ##################################################### # functions for retrieving and processing the results + def get_vbench_log(build_url): - r=urllib2.urlopen(build_url) + r = urllib2.urlopen(build_url) if not (200 <= r.getcode() < 300): return - s=json.loads(r.read()) - s=[x for x in s['matrix'] if "VBENCH" in ((x.get('config',{}) or {}).get('env',{}) or {})] - #s=[x for x in s['matrix']] + s = json.loads(r.read()) + s = [x for x in s['matrix'] if "VBENCH" in ((x.get('config', {}) + or {}).get('env', {}) or {})] + # s=[x for x in s['matrix']] if not s: return - id=s[0]['id'] # should be just one for now - r2=urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id) + id = s[0]['id'] # should be just one for now + r2 = urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id) if (not 200 <= r.getcode() < 300): return - s2=json.loads(r2.read()) + s2 = json.loads(r2.read()) return s2.get('log') + def get_results_raw_url(build): "Taks a Travis a build number, retrieves the build log and extracts the gist url" import re - log=get_vbench_log("https://api.travis-ci.org/builds/%s" % build) + log = get_vbench_log("https://api.travis-ci.org/builds/%s" % build) if not log: return - l=[x.strip() for x in log.split("\n") if re.match(".vbench-gist-raw_url",x)] + l = [x.strip( + ) for x in log.split("\n") if re.match(".vbench-gist-raw_url", x)] if l: - s=l[0] - m = re.search("(https://[^\s]+)",s) + s = l[0] + m = re.search("(https://[^\s]+)", s) if m: return m.group(0) + def convert_json_to_df(results_url): """retrieve json results file from url and return df df contains timings for all successful vbenchmarks """ - res=json.loads(urllib2.urlopen(results_url).read()) - timings=res.get("timings") + res = json.loads(urllib2.urlopen(results_url).read()) + timings = res.get("timings") if not timings: return - res=[x for x in timings if x.get('succeeded')] + res = [x for x in timings if x.get('succeeded')] df = pd.DataFrame(res) df = df.set_index("name") return df + def get_build_results(build): "Returns a df with the results of the VBENCH job associated with the travis build" - r_url=get_results_raw_url(build) + r_url = get_results_raw_url(build) if not r_url: return return convert_json_to_df(r_url) -def get_all_results(repo_id=53976): # travis pydata/pandas id - """Fetches the VBENCH results for all travis builds, and returns a list of result df - - unsuccesful individual vbenches are dropped. - """ - from collections import OrderedDict - def get_results_from_builds(builds): - dfs=OrderedDict() - for build in builds: - build_id = build['id'] - build_number = build['number'] - print(build_number) - res = get_build_results(build_id) - if res is not None: - dfs[build_number]=res - return dfs - - base_url='https://api.travis-ci.org/builds?url=%2Fbuilds&repository_id={repo_id}' - url=base_url.format(repo_id=repo_id) - url_after=url+'&after_number={after}' - dfs=OrderedDict() - - while True: - r=urllib2.urlopen(url) - if not (200 <= r.getcode() < 300): - break - builds=json.loads(r.read()) - res = get_results_from_builds(builds) - if not res: - break - last_build_number= min(res.keys()) - dfs.update(res) - url=url_after.format(after=last_build_number) - - return dfs + +def get_all_results(repo_id=53976): # travis pydata/pandas id + """Fetches the VBENCH results for all travis builds, and returns a list of result df + + unsuccesful individual vbenches are dropped. + """ + from collections import OrderedDict + + def get_results_from_builds(builds): + dfs = OrderedDict() + for build in builds: + build_id = build['id'] + build_number = build['number'] + print(build_number) + res = get_build_results(build_id) + if res is not None: + dfs[build_number] = res + return dfs + + base_url = 'https://api.travis-ci.org/builds?url=%2Fbuilds&repository_id={repo_id}' + url = base_url.format(repo_id=repo_id) + url_after = url + '&after_number={after}' + dfs = OrderedDict() + + while True: + r = urllib2.urlopen(url) + if not (200 <= r.getcode() < 300): + break + builds = json.loads(r.read()) + res = get_results_from_builds(builds) + if not res: + break + last_build_number = min(res.keys()) + dfs.update(res) + url = url_after.format(after=last_build_number) + + return dfs + def get_all_results_joined(repo_id=53976): - def mk_unique(df): - for dupe in df.index.get_duplicates(): - df=df.ix[df.index != dupe] - return df - dfs = get_all_results(repo_id) - for k in dfs: - dfs[k]=mk_unique(dfs[k]) - ss=[pd.Series(v.timing,name=k) for k,v in dfs.iteritems()] - results = pd.concat(reversed(ss),1) - return results + def mk_unique(df): + for dupe in df.index.get_duplicates(): + df = df.ix[df.index != dupe] + return df + dfs = get_all_results(repo_id) + for k in dfs: + dfs[k] = mk_unique(dfs[k]) + ss = [pd.Series(v.timing, name=k) for k, v in dfs.iteritems()] + results = pd.concat(reversed(ss), 1) + return results diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py index 2523462eb4e4b..2f675636ee928 100644 --- a/vb_suite/reindex.py +++ b/vb_suite/reindex.py @@ -136,7 +136,7 @@ def backfill(): statement = "df.drop_duplicates(['key1', 'key2'], inplace=True)" frame_drop_dup_inplace = Benchmark(statement, setup, - start_date=datetime(2012, 5, 16)) + start_date=datetime(2012, 5, 16)) lib_fast_zip = Benchmark('lib.fast_zip(col_array_list)', setup, name='lib_fast_zip', @@ -154,7 +154,7 @@ def backfill(): statement2 = "df.drop_duplicates(['key1', 'key2'], inplace=True)" frame_drop_dup_na_inplace = Benchmark(statement2, setup, - start_date=datetime(2012, 5, 16)) + start_date=datetime(2012, 5, 16)) setup = common_setup + """ s = Series(np.random.randint(0, 1000, size=10000)) diff --git a/vb_suite/run_suite.py b/vb_suite/run_suite.py index 0c03d17607f4e..43bf24faae43a 100755 --- a/vb_suite/run_suite.py +++ b/vb_suite/run_suite.py @@ -2,6 +2,7 @@ from vbench.api import BenchmarkRunner from suite import * + def run_process(): runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_URL, BUILD, DB_PATH, TMP_DIR, PREPARE, diff --git a/vb_suite/source/conf.py b/vb_suite/source/conf.py index 35a89ba8e1de6..d83448fd97d09 100644 --- a/vb_suite/source/conf.py +++ b/vb_suite/source/conf.py @@ -10,12 +10,13 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys +import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.append(os.path.abspath('.')) +# sys.path.append(os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../sphinxext')) sys.path.extend([ @@ -27,7 +28,7 @@ ]) -# -- General configuration ----------------------------------------------------- +# -- General configuration ----------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. sphinxext. @@ -42,7 +43,7 @@ source_suffix = '.rst' # The encoding of source files. -#source_encoding = 'utf-8' +# source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' @@ -69,43 +70,43 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. -#unused_docs = [] +# unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] -# -- Options for HTML output --------------------------------------------------- +# -- Options for HTML output --------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. @@ -114,12 +115,12 @@ # The style sheet to use for HTML and HTML Help pages. A file of that name # must exist either in Sphinx' static/ path, or in one of the custom paths # given in html_static_path. -#html_style = 'statsmodels.css' +# html_style = 'statsmodels.css' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. html_theme_path = ['themes'] @@ -129,16 +130,16 @@ html_title = 'Vbench performance benchmarks for pandas' # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -147,75 +148,75 @@ # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. html_use_modindex = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' +# html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'performance' -# -- Options for LaTeX output -------------------------------------------------- +# -- Options for LaTeX output -------------------------------------------- # The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' +# latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' +# latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'performance.tex', - u'pandas vbench Performance Benchmarks', - u'Wes McKinney', 'manual'), + ('index', 'performance.tex', + u'pandas vbench Performance Benchmarks', + u'Wes McKinney', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # Additional stuff for the LaTeX preamble. -#latex_preamble = '' +# latex_preamble = '' # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_use_modindex = True +# latex_use_modindex = True # Example configuration for intersphinx: refer to the Python standard library. diff --git a/vb_suite/sparse.py b/vb_suite/sparse.py index 24f70d284956b..bfee959ab982f 100644 --- a/vb_suite/sparse.py +++ b/vb_suite/sparse.py @@ -26,7 +26,7 @@ stmt = "SparseDataFrame(series)" bm_sparse1 = Benchmark(stmt, setup, name="sparse_series_to_frame", - start_date=datetime(2011, 6, 1)) + start_date=datetime(2011, 6, 1)) setup = common_setup + """ diff --git a/vb_suite/suite.py b/vb_suite/suite.py index e44eda802dc71..380ca5e5fb3b6 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -57,7 +57,7 @@ DB_PATH = config.get('setup', 'db_path') TMP_DIR = config.get('setup', 'tmp_dir') except: - REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__),"../")) + REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) REPO_URL = 'git@github.com:pydata/pandas.git' DB_PATH = os.path.join(REPO_PATH, 'vb_suite/benchmarks.db') TMP_DIR = os.path.join(HOME, 'tmp/vb_pandas') @@ -78,7 +78,8 @@ # HACK! -#timespan = [datetime(2011, 1, 1), datetime(2012, 1, 1)] +# timespan = [datetime(2011, 1, 1), datetime(2012, 1, 1)] + def generate_rst_files(benchmarks): import matplotlib as mpl diff --git a/vb_suite/test.py b/vb_suite/test.py index b565ea37fde9f..da30c3e1a5f76 100644 --- a/vb_suite/test.py +++ b/vb_suite/test.py @@ -22,8 +22,10 @@ bmk = 'e0e651a8e9fbf0270ab68137f8b9df5f' bmk = '96bda4b9a60e17acf92a243580f2a0c3' + def get_results(bmk): - results = con.execute("select * from results where checksum='%s'" % bmk).fetchall() + results = con.execute( + "select * from results where checksum='%s'" % bmk).fetchall() x = Series(dict((t[1], t[3]) for t in results)) x.index = x.index.map(repo.timestamps.get) x = x.sort_index() @@ -31,6 +33,7 @@ def get_results(bmk): x = get_results(bmk) + def graph1(): dm_getitem = get_results('459225186023853494bc345fd180f395') dm_getvalue = get_results('c22ca82e0cfba8dc42595103113c7da3') @@ -44,6 +47,7 @@ def graph1(): plt.ylabel('ms') plt.legend(loc='best') + def graph2(): bm = get_results('96bda4b9a60e17acf92a243580f2a0c3') plt.figure() @@ -61,4 +65,3 @@ def graph2(): plt.xlim([bm.dropna().index[0] - datetools.MonthEnd(), bm.dropna().index[-1] + datetools.MonthEnd()]) plt.ylabel('ms') - diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 207a4eb8a4db5..c993e62de450b 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -33,17 +33,17 @@ import time DEFAULT_MIN_DURATION = 0.01 -BASELINE_COMMIT = '2149c50' # 0.9.1 + regression fix + vb fixes # TODO: detect upstream/master +BASELINE_COMMIT = '2149c50' # 0.9.1 + regression fix + vb fixes # TODO: detect upstream/master parser = argparse.ArgumentParser(description='Use vbench to generate a report comparing performance between two commits.') parser.add_argument('-a', '--auto', help='Execute a run using the defaults for the base and target commits.', action='store_true', default=False) -parser.add_argument('-b','--base-commit', +parser.add_argument('-b', '--base-commit', help='The commit serving as performance baseline (default: %s).' % BASELINE_COMMIT, type=str) -parser.add_argument('-t','--target-commit', +parser.add_argument('-t', '--target-commit', help='The commit to compare against the baseline (default: HEAD).', type=str) parser.add_argument('-m', '--min-duration', @@ -55,7 +55,8 @@ dest='log_file', help='path of file in which to save the report (default: vb_suite.log).') -def get_results_df(db,rev): + +def get_results_df(db, rev): from pandas import DataFrame """Takes a git commit hash and returns a Dataframe of benchmark results """ @@ -68,8 +69,10 @@ def get_results_df(db,rev): results = results.join(bench['name'], on='checksum').set_index("checksum") return results + def prprint(s): - print("*** %s"%s) + print("*** %s" % s) + def main(): from pandas import DataFrame @@ -86,77 +89,85 @@ def main(): args.target_commit = args.target_commit[:7] if not args.log_file: - args.log_file = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log')) + args.log_file = os.path.abspath( + os.path.join(REPO_PATH, 'vb_suite.log')) - TMP_DIR = tempfile.mkdtemp() + TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % args.log_file) try: logfile = open(args.log_file, 'w') - prprint( "Opening DB at '%s'...\n" % DB_PATH) + prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") - runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, - TMP_DIR, PREPARE, always_clean=True, - # run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) + runner = BenchmarkRunner( + benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, + TMP_DIR, PREPARE, always_clean=True, + # run_option='eod', start_date=START_DATE, + module_dependencies=dependencies) - repo = runner.repo #(steal the parsed git repo used by runner) + repo = runner.repo # (steal the parsed git repo used by runner) # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results - #prprint ("Slaughtering kittens..." ) + # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit - prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,""))) - prprint('Baseline [%s] : %s\n' % (h_baseline,repo.messages.get(h_baseline,""))) + prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) + prprint('Baseline [%s] : %s\n' % (h_baseline, + repo.messages.get(h_baseline, ""))) - prprint ("removing any previous measurements for the commits." ) + prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. - prprint( "Running benchmarks for baseline [%s]" % h_baseline) + prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) - prprint ("Running benchmarks for target [%s]" % h_head) + prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) - prprint( 'Processing results...') + prprint('Processing results...') - head_res = get_results_df(db,h_head) - baseline_res = get_results_df(db,h_baseline) - ratio = head_res['timing']/baseline_res['timing'] + head_res = get_results_df(db, h_head) + baseline_res = get_results_df(db, h_baseline) + ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, - name=baseline_res.name),columns=["t_head","t_baseline","ratio","name"]) - totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold - totals = totals.dropna().sort("ratio").set_index('name') # sort in ascending order + name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) + totals = totals.ix[totals.t_head > args.min_duration] + # ignore below threshold + totals = totals.dropna( + ).sort("ratio").set_index('name') # sort in ascending order s = "\n\nResults:\n" - s += totals.to_string(float_format=lambda x: "{:4.4f}".format(x).rjust(10)) + s += totals.to_string( + float_format=lambda x: "{:4.4f}".format(x).rjust(10)) s += "\n\n" s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n" s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n" - s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,"")) - s += 'Baseline [%s] : %s\n\n' % (h_baseline,repo.messages.get(h_baseline,"")) + s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) + s += 'Baseline [%s] : %s\n\n' % ( + h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() - prprint(s ) - prprint("Results were also written to the logfile at '%s'\n" % args.log_file) + prprint(s) + prprint("Results were also written to the logfile at '%s'\n" % + args.log_file) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) @@ -172,7 +183,7 @@ def _parse_commit_log(repo_path): from pandas import Series git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) githist = git_cmd + ('log --graph --pretty=format:' - '\"::%h::%cd::%s::%an\" > githist.txt') + '\"::%h::%cd::%s::%an\" > githist.txt') os.system(githist) githist = open('githist.txt').read() os.remove('githist.txt') @@ -182,7 +193,7 @@ def _parse_commit_log(repo_path): messages = [] authors = [] for line in githist.split('\n'): - if '*' not in line.split("::")[0]: # skip non-commit lines + if '*' not in line.split("::")[0]: # skip non-commit lines continue _, sha, stamp, message, author = line.split('::', 4) diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py index 1727db1285cd1..202aa5e4d5e61 100644 --- a/vb_suite/timeseries.py +++ b/vb_suite/timeseries.py @@ -42,13 +42,15 @@ def date_range(start=None, end=None, periods=None, freq=None): """ -timeseries_1min_5min_ohlc = Benchmark("ts[:10000].resample('5min', how='ohlc')", - common_setup, - start_date=datetime(2012, 5, 1)) +timeseries_1min_5min_ohlc = Benchmark( + "ts[:10000].resample('5min', how='ohlc')", + common_setup, + start_date=datetime(2012, 5, 1)) -timeseries_1min_5min_mean = Benchmark("ts[:10000].resample('5min', how='mean')", - common_setup, - start_date=datetime(2012, 5, 1)) +timeseries_1min_5min_mean = Benchmark( + "ts[:10000].resample('5min', how='mean')", + common_setup, + start_date=datetime(2012, 5, 1)) #---------------------------------------------------------------------- # Irregular alignment @@ -92,7 +94,7 @@ def date_range(start=None, end=None, periods=None, freq=None): dates = date_range('1/1/1990', periods=N * 10, freq='5s') """ timeseries_asof_single = Benchmark('ts.asof(dates[0])', setup, - start_date=datetime(2012, 4, 27)) + start_date=datetime(2012, 4, 27)) timeseries_asof = Benchmark('ts.asof(dates)', setup, start_date=datetime(2012, 4, 27)) @@ -187,7 +189,7 @@ def date_range(start=None, end=None, periods=None, freq=None): """ dti_append_tz = \ - Benchmark('s1.append(slst)', setup, start_date=datetime(2012, 9 ,1)) + Benchmark('s1.append(slst)', setup, start_date=datetime(2012, 9, 1)) setup = common_setup + """ rng = date_range('1/1/2000', periods=100000, freq='H') @@ -195,7 +197,7 @@ def date_range(start=None, end=None, periods=None, freq=None): """ dti_reset_index = \ - Benchmark('df.reset_index()', setup, start_date=datetime(2012,9,1)) + Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1)) setup = common_setup + """ rng = date_range('1/1/2000', periods=100000, freq='H') @@ -203,7 +205,7 @@ def date_range(start=None, end=None, periods=None, freq=None): """ dti_reset_index_tz = \ - Benchmark('df.reset_index()', setup, start_date=datetime(2012,9,1)) + Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1)) setup = common_setup + """ rng = date_range('1/1/2000', periods=10000, freq='T')