Closed
Description
from SO
idx = pd.IndexSlice
n=10000000
np.random.seed(1234)
mdt = pandas.DataFrame()
mdt['A'] = np.random.choice(range(10000,45000,1000), n)
mdt['B'] = np.random.choice(range(10,400), n)
mdt['C'] = np.random.choice(range(1,150), n)
mdt['D'] = np.random.choice(range(10000,45000), n)
mdt['x'] = np.random.choice(range(400), n)
mdt['y'] = np.random.choice(range(25), n)
test_A = 25000
test_B = 25
test_C = 40
test_D = 35000
eps_A = 5000
eps_B = 5
eps_C = 5
eps_D = 5000
mdt2 = mdt.set_index(['A','B','C','D']).sortlevel()
selection
In [106]: %timeit mdt2.loc[idx[test_A-eps_A:test_A+eps_A,test_B-eps_B:test_B+eps_B,test_C-eps_C:test_C+eps_C,test_D-eps_D:test_D+eps_D],:]
1 loops, best of 3: 4.34 s per loop
Repeated selection
In [105]: %timeit mdt2.loc[idx[test_A-eps_A:test_A+eps_A],:].loc[idx[:,test_B-eps_B:test_B+eps_B],:].loc[idx[:,:,test_C-eps_C:test_C+eps_C],:].loc[idx[:,:,:,test_D-eps_D:test_D+eps_D],:]
10 loops, best of 3: 140 ms per loop