|
25 | 25 |
|
26 | 26 | import pandas.util.testing as tm
|
27 | 27 | from pandas import date_range
|
| 28 | +from numpy.testing.decorators import slow |
28 | 29 |
|
29 | 30 | _verbose = False
|
30 | 31 |
|
@@ -1689,74 +1690,71 @@ def test_multiindex_perf_warn(self):
|
1689 | 1690 | with tm.assert_produces_warning(PerformanceWarning):
|
1690 | 1691 | _ = df.loc[(0,)]
|
1691 | 1692 |
|
| 1693 | + @slow |
1692 | 1694 | def test_multiindex_get_loc(self): # GH7724, GH2646
|
1693 | 1695 |
|
1694 |
| - # ignore the warning here |
1695 |
| - warnings.simplefilter('ignore', PerformanceWarning) |
| 1696 | + with warnings.catch_warnings(PerformanceWarning): |
1696 | 1697 |
|
1697 |
| - # test indexing into a multi-index before & past the lexsort depth |
1698 |
| - from numpy.random import randint, choice, randn |
1699 |
| - cols = ['jim', 'joe', 'jolie', 'joline', 'jolia'] |
| 1698 | + # test indexing into a multi-index before & past the lexsort depth |
| 1699 | + from numpy.random import randint, choice, randn |
| 1700 | + cols = ['jim', 'joe', 'jolie', 'joline', 'jolia'] |
1700 | 1701 |
|
1701 |
| - def validate(mi, df, key): |
1702 |
| - mask = np.ones(len(df)).astype('bool') |
| 1702 | + def validate(mi, df, key): |
| 1703 | + mask = np.ones(len(df)).astype('bool') |
1703 | 1704 |
|
1704 |
| - # test for all partials of this key |
1705 |
| - for i, k in enumerate(key): |
1706 |
| - mask &= df.iloc[:, i] == k |
| 1705 | + # test for all partials of this key |
| 1706 | + for i, k in enumerate(key): |
| 1707 | + mask &= df.iloc[:, i] == k |
1707 | 1708 |
|
1708 |
| - if not mask.any(): |
1709 |
| - self.assertNotIn(key[:i+1], mi.index) |
1710 |
| - continue |
1711 |
| - |
1712 |
| - self.assertIn(key[:i+1], mi.index) |
1713 |
| - right = df[mask].copy() |
| 1709 | + if not mask.any(): |
| 1710 | + self.assertNotIn(key[:i+1], mi.index) |
| 1711 | + continue |
1714 | 1712 |
|
1715 |
| - if i + 1 != len(key): # partial key |
1716 |
| - right.drop(cols[:i+1], axis=1, inplace=True) |
1717 |
| - right.set_index(cols[i+1:-1], inplace=True) |
1718 |
| - assert_frame_equal(mi.loc[key[:i+1]], right) |
| 1713 | + self.assertIn(key[:i+1], mi.index) |
| 1714 | + right = df[mask].copy() |
1719 | 1715 |
|
1720 |
| - else: # full key |
1721 |
| - right.set_index(cols[:-1], inplace=True) |
1722 |
| - if len(right) == 1: # single hit |
1723 |
| - right = Series(right['jolia'].values, |
1724 |
| - name=right.index[0], index=['jolia']) |
1725 |
| - assert_series_equal(mi.loc[key[:i+1]], right) |
1726 |
| - else: # multi hit |
| 1716 | + if i + 1 != len(key): # partial key |
| 1717 | + right.drop(cols[:i+1], axis=1, inplace=True) |
| 1718 | + right.set_index(cols[i+1:-1], inplace=True) |
1727 | 1719 | assert_frame_equal(mi.loc[key[:i+1]], right)
|
1728 | 1720 |
|
1729 |
| - def loop(mi, df, keys): |
1730 |
| - for key in keys: |
1731 |
| - validate(mi, df, key) |
1732 |
| - |
1733 |
| - n, m = 1000, 50 |
1734 |
| - |
1735 |
| - vals = [randint(0, 10, n), choice(list('abcdefghij'), n), |
1736 |
| - choice(pd.date_range('20141009', periods=10).tolist(), n), |
1737 |
| - choice(list('ZYXWVUTSRQ'), n), randn(n)] |
1738 |
| - vals = list(map(tuple, zip(*vals))) |
1739 |
| - |
1740 |
| - # bunch of keys for testing |
1741 |
| - keys = [randint(0, 11, m), choice(list('abcdefghijk'), m), |
1742 |
| - choice(pd.date_range('20141009', periods=11).tolist(), m), |
1743 |
| - choice(list('ZYXWVUTSRQP'), m)] |
1744 |
| - keys = list(map(tuple, zip(*keys))) |
1745 |
| - keys += list(map(lambda t: t[:-1], vals[::n//m])) |
1746 |
| - |
1747 |
| - # covers both unique index and non-unique index |
1748 |
| - df = pd.DataFrame(vals, columns=cols) |
1749 |
| - a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) |
1750 |
| - |
1751 |
| - for frame in a, b: |
1752 |
| - for i in range(5): # lexsort depth |
1753 |
| - df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i]) |
1754 |
| - mi = df.set_index(cols[:-1]) |
1755 |
| - assert not mi.index.lexsort_depth < i |
1756 |
| - loop(mi, df, keys) |
1757 |
| - |
1758 |
| - # restore |
1759 |
| - warnings.simplefilter('always', PerformanceWarning) |
| 1721 | + else: # full key |
| 1722 | + right.set_index(cols[:-1], inplace=True) |
| 1723 | + if len(right) == 1: # single hit |
| 1724 | + right = Series(right['jolia'].values, |
| 1725 | + name=right.index[0], index=['jolia']) |
| 1726 | + assert_series_equal(mi.loc[key[:i+1]], right) |
| 1727 | + else: # multi hit |
| 1728 | + assert_frame_equal(mi.loc[key[:i+1]], right) |
| 1729 | + |
| 1730 | + def loop(mi, df, keys): |
| 1731 | + for key in keys: |
| 1732 | + validate(mi, df, key) |
| 1733 | + |
| 1734 | + n, m = 1000, 50 |
| 1735 | + |
| 1736 | + vals = [randint(0, 10, n), choice(list('abcdefghij'), n), |
| 1737 | + choice(pd.date_range('20141009', periods=10).tolist(), n), |
| 1738 | + choice(list('ZYXWVUTSRQ'), n), randn(n)] |
| 1739 | + vals = list(map(tuple, zip(*vals))) |
| 1740 | + |
| 1741 | + # bunch of keys for testing |
| 1742 | + keys = [randint(0, 11, m), choice(list('abcdefghijk'), m), |
| 1743 | + choice(pd.date_range('20141009', periods=11).tolist(), m), |
| 1744 | + choice(list('ZYXWVUTSRQP'), m)] |
| 1745 | + keys = list(map(tuple, zip(*keys))) |
| 1746 | + keys += list(map(lambda t: t[:-1], vals[::n//m])) |
| 1747 | + |
| 1748 | + # covers both unique index and non-unique index |
| 1749 | + df = pd.DataFrame(vals, columns=cols) |
| 1750 | + a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) |
| 1751 | + |
| 1752 | + for frame in a, b: |
| 1753 | + for i in range(5): # lexsort depth |
| 1754 | + df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i]) |
| 1755 | + mi = df.set_index(cols[:-1]) |
| 1756 | + assert not mi.index.lexsort_depth < i |
| 1757 | + loop(mi, df, keys) |
1760 | 1758 |
|
1761 | 1759 | def test_series_getitem_multiindex(self):
|
1762 | 1760 |
|
@@ -4653,13 +4651,15 @@ def test_indexing_dtypes_on_empty(self):
|
4653 | 4651 | assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0])
|
4654 | 4652 | assert_series_equal(df2.loc[:,'a'], df2.ix[:,0])
|
4655 | 4653 |
|
| 4654 | + @slow |
4656 | 4655 | def test_large_dataframe_indexing(self):
|
4657 | 4656 | #GH10692
|
4658 | 4657 | result = DataFrame({'x': range(10**6)},dtype='int64')
|
4659 | 4658 | result.loc[len(result)] = len(result) + 1
|
4660 | 4659 | expected = DataFrame({'x': range(10**6 + 1)},dtype='int64')
|
4661 | 4660 | assert_frame_equal(result, expected)
|
4662 | 4661 |
|
| 4662 | + @slow |
4663 | 4663 | def test_large_mi_dataframe_indexing(self):
|
4664 | 4664 | #GH10645
|
4665 | 4665 | result = MultiIndex.from_arrays([range(10**6), range(10**6)])
|
|
0 commit comments