diff --git a/pandas/tests/indexes/multi/__init__.py b/pandas/tests/indexes/multi/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py new file mode 100644 index 0000000000000..6cf9003500b61 --- /dev/null +++ b/pandas/tests/indexes/multi/conftest.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest +from pandas import Index, MultiIndex + + +@pytest.fixture +def idx(): + # a MultiIndex used to test the general functionality of the + # general functionality of this object + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + index_names = ['first', 'second'] + index = MultiIndex( + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=index_names, + verify_integrity=False + ) + return index + + +@pytest.fixture +def index_names(): + # names that match those in the idx fixture for testing equality of + # names assigned to the idx + return ['first', 'second'] + + +@pytest.fixture +def holder(): + # the MultiIndex constructor used to base compatibility with pickle + return MultiIndex + + +@pytest.fixture +def compat_props(): + # a MultiIndex must have these properties associated with it + return ['shape', 'ndim', 'size'] diff --git a/pandas/tests/indexes/data/mindex_073.pickle b/pandas/tests/indexes/multi/data/mindex_073.pickle similarity index 100% rename from pandas/tests/indexes/data/mindex_073.pickle rename to pandas/tests/indexes/multi/data/mindex_073.pickle diff --git a/pandas/tests/indexes/data/multiindex_v1.pickle b/pandas/tests/indexes/multi/data/multiindex_v1.pickle similarity index 100% rename from pandas/tests/indexes/data/multiindex_v1.pickle rename to pandas/tests/indexes/multi/data/multiindex_v1.pickle diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py new file mode 100644 index 0000000000000..072356e4923a6 --- /dev/null +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -0,0 +1,8 @@ +import pytest + + +def test_shift(idx): + + # GH8083 test the base class for shift + pytest.raises(NotImplementedError, idx.shift, 1) + pytest.raises(NotImplementedError, idx.shift, 1, 2) diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py new file mode 100644 index 0000000000000..0dfe322c2eef9 --- /dev/null +++ b/pandas/tests/indexes/multi/test_compat.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas.util.testing as tm +import pytest +from pandas import MultiIndex +from pandas.compat import PY3, long + + +def test_numeric_compat(idx): + tm.assert_raises_regex(TypeError, "cannot perform __mul__", + lambda: idx * 1) + tm.assert_raises_regex(TypeError, "cannot perform __rmul__", + lambda: 1 * idx) + + div_err = "cannot perform __truediv__" if PY3 \ + else "cannot perform __div__" + tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + div_err = div_err.replace(' __', ' __r') + tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) + tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", + lambda: idx // 1) + tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", + lambda: 1 // idx) + + +def test_logical_compat(idx): + tm.assert_raises_regex(TypeError, 'cannot perform all', + lambda: idx.all()) + tm.assert_raises_regex(TypeError, 'cannot perform any', + lambda: idx.any()) + + +def test_boolean_context_compat(idx): + + with pytest.raises(ValueError): + bool(idx) + + +def test_boolean_context_compat2(): + + # boolean context compat + # GH7897 + i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) + common = i1.intersection(i2) + + with pytest.raises(ValueError): + bool(common) + + +def test_inplace_mutation_resets_values(): + levels = [['a', 'b', 'c'], [4]] + levels2 = [[1, 2, 3], ['a']] + labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + + mi1 = MultiIndex(levels=levels, labels=labels) + mi2 = MultiIndex(levels=levels2, labels=labels) + vals = mi1.values.copy() + vals2 = mi2.values.copy() + + assert mi1._tuples is not None + + # Make sure level setting works + new_vals = mi1.set_levels(levels2).values + tm.assert_almost_equal(vals2, new_vals) + + # Non-inplace doesn't kill _tuples [implementation detail] + tm.assert_almost_equal(mi1._tuples, vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) + + # Inplace should kill _tuples + mi1.set_levels(levels2, inplace=True) + tm.assert_almost_equal(mi1.values, vals2) + + # Make sure label setting works too + labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + exp_values = np.empty((6,), dtype=object) + exp_values[:] = [(long(1), 'a')] * 6 + + # Must be 1d array of tuples + assert exp_values.shape == (6,) + new_values = mi2.set_labels(labels2).values + + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._tuples, vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should kill _tuples, etc + mi2.set_labels(labels2, inplace=True) + tm.assert_almost_equal(mi2.values, new_values) + + +def test_ndarray_compat_properties(idx, compat_props): + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) + + values = idx.values + for prop in compat_props: + assert getattr(idx, prop) == getattr(values, prop) + + # test for validity + idx.nbytes + idx.values.nbytes + + +def test_compat(indices): + assert indices.tolist() == list(indices) + + +def test_pickle_compat_construction(holder): + # this is testing for pickle compat + if holder is None: + return + + # need an object to create with + pytest.raises(TypeError, holder) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py new file mode 100644 index 0000000000000..9577662bda366 --- /dev/null +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -0,0 +1,434 @@ +# -*- coding: utf-8 -*- + +import re + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Index, MultiIndex, date_range +from pandas._libs.tslib import Timestamp +from pandas.compat import lrange, range +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + + +def test_constructor_single_level(): + result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + assert isinstance(result, MultiIndex) + expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ['first'] + + +def test_constructor_no_levels(): + tm.assert_raises_regex(ValueError, "non-zero number " + "of levels/labels", + MultiIndex, levels=[], labels=[]) + both_re = re.compile('Must pass both levels and labels') + with tm.assert_raises_regex(TypeError, both_re): + MultiIndex(levels=[]) + with tm.assert_raises_regex(TypeError, both_re): + MultiIndex(labels=[]) + + +def test_constructor_nonhashable_names(): + # GH 20527 + levels = [[1, 2], [u'one', u'two']] + labels = [[0, 0, 1, 1], [0, 1, 0, 1]] + names = ((['foo'], ['bar'])) + message = "MultiIndex.name must be a hashable type" + tm.assert_raises_regex(TypeError, message, + MultiIndex, levels=levels, + labels=labels, names=names) + + # With .rename() + mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=('foo', 'bar')) + renamed = [['foor'], ['barr']] + tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) + # With .set_names() + tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) + + +def test_constructor_mismatched_label_levels(idx): + labels = [np.array([1]), np.array([2]), np.array([3])] + levels = ["a"] + tm.assert_raises_regex(ValueError, "Length of levels and labels " + "must be the same", MultiIndex, + levels=levels, labels=labels) + length_error = re.compile('>= length of level') + label_error = re.compile(r'Unequal label lengths: \[4, 2\]') + + # important to check that it's looking at the right thing. + with tm.assert_raises_regex(ValueError, length_error): + MultiIndex(levels=[['a'], ['b']], + labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) + + with tm.assert_raises_regex(ValueError, label_error): + MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) + + # external API + with tm.assert_raises_regex(ValueError, length_error): + idx.copy().set_levels([['a'], ['b']]) + + with tm.assert_raises_regex(ValueError, label_error): + idx.copy().set_labels([[0, 0, 0, 0], [0, 0]]) + + +def test_copy_in_constructor(): + levels = np.array(["a", "b", "c"]) + labels = np.array([1, 1, 2, 0, 0, 1, 1]) + val = labels[0] + mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], + copy=True) + assert mi.labels[0][0] == val + labels[0] = 15 + assert mi.labels[0][0] == val + val = levels[0] + levels[0] = "PANDA" + assert mi.levels[0][0] == val + + +def test_from_arrays(idx): + arrays = [] + for lev, lab in zip(idx.levels, idx.labels): + arrays.append(np.asarray(lev).take(lab)) + + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + # infer correctly + result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], + ['a', 'b']]) + assert result.levels[0].equals(Index([Timestamp('20130101')])) + assert result.levels[1].equals(Index(['a', 'b'])) + + +def test_from_arrays_iterator(idx): + # GH 18434 + arrays = [] + for lev, lab in zip(idx.levels, idx.labels): + arrays.append(np.asarray(lev).take(lab)) + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=idx.names) + tm.assert_index_equal(result, idx) + + # invalid iterator input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of array-likes."): + MultiIndex.from_arrays(0) + + +def test_from_arrays_index_series_datetimetz(): + idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, + tz='US/Eastern') + idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3, + tz='Asia/Tokyo') + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_timedelta(): + idx1 = pd.timedelta_range('1 days', freq='D', periods=3) + idx2 = pd.timedelta_range('2 hours', freq='H', periods=3) + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_period(): + idx1 = pd.period_range('2011-01-01', freq='D', periods=3) + idx2 = pd.period_range('2015-01-01', freq='H', periods=3) + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_datetimelike_mixed(): + idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, + tz='US/Eastern') + idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3) + idx3 = pd.timedelta_range('1 days', freq='D', periods=3) + idx4 = pd.period_range('2011-01-01', freq='D', periods=3) + + result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + tm.assert_index_equal(result.get_level_values(2), idx3) + tm.assert_index_equal(result.get_level_values(3), idx4) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), + pd.Series(idx2), + pd.Series(idx3), + pd.Series(idx4)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + tm.assert_index_equal(result2.get_level_values(2), idx3) + tm.assert_index_equal(result2.get_level_values(3), idx4) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_categorical(): + # GH13743 + idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), + ordered=False) + idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), + ordered=True) + + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) + tm.assert_index_equal(result3.get_level_values(0), idx1) + tm.assert_index_equal(result3.get_level_values(1), idx2) + + +def test_from_arrays_empty(): + # 0 levels + with tm.assert_raises_regex( + ValueError, "Must pass non-zero number of levels/labels"): + MultiIndex.from_arrays(arrays=[]) + + # 1 level + result = MultiIndex.from_arrays(arrays=[[]], names=['A']) + assert isinstance(result, MultiIndex) + expected = Index([], name='A') + tm.assert_index_equal(result.levels[0], expected) + + # N levels + for N in [2, 3]: + arrays = [[]] * N + names = list('ABC')[:N] + result = MultiIndex.from_arrays(arrays=arrays, names=names) + expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, + names=names) + tm.assert_index_equal(result, expected) + + +def test_from_arrays_invalid_input(): + invalid_inputs = [1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b']] + for i in invalid_inputs: + pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) + + +def test_from_arrays_different_lengths(): + # see gh-13599 + idx1 = [1, 2, 3] + idx2 = ['a', 'b'] + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) + + idx1 = [] + idx2 = ['a', 'b'] + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) + + idx1 = [1, 2, 3] + idx2 = [] + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) + + +def test_from_tuples(): + tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' + 'from empty list', + MultiIndex.from_tuples, []) + + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_iterator(): + # GH 18434 + # input iterator for tuples + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b']) + tm.assert_index_equal(result, expected) + + # input non-iterables + with tm.assert_raises_regex( + TypeError, 'Input must be a list / sequence of tuple-likes.'): + MultiIndex.from_tuples(0) + + +def test_from_tuples_empty(): + # GH 16777 + result = MultiIndex.from_tuples([], names=['a', 'b']) + expected = MultiIndex.from_arrays(arrays=[[], []], + names=['a', 'b']) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_index_values(idx): + result = MultiIndex.from_tuples(idx) + assert (result.values == idx.values).all() + + +def test_from_product_empty(): + # 0 levels + with tm.assert_raises_regex( + ValueError, "Must pass non-zero number of levels/labels"): + MultiIndex.from_product([]) + + # 1 level + result = MultiIndex.from_product([[]], names=['A']) + expected = pd.Index([], name='A') + tm.assert_index_equal(result.levels[0], expected) + + # 2 levels + l1 = [[], ['foo', 'bar', 'baz'], []] + l2 = [[], [], ['a', 'b', 'c']] + names = ['A', 'B'] + for first, second in zip(l1, l2): + result = MultiIndex.from_product([first, second], names=names) + expected = MultiIndex(levels=[first, second], + labels=[[], []], names=names) + tm.assert_index_equal(result, expected) + + # GH12258 + names = ['A', 'B', 'C'] + for N in range(4): + lvl2 = lrange(N) + result = MultiIndex.from_product([[], lvl2, []], names=names) + expected = MultiIndex(levels=[[], lvl2, []], + labels=[[], [], []], names=names) + tm.assert_index_equal(result, expected) + + +def test_from_product_invalid_input(): + invalid_inputs = [1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b']] + for i in invalid_inputs: + pytest.raises(TypeError, MultiIndex.from_product, iterables=i) + + +def test_from_product_datetimeindex(): + dt_index = date_range('2000-01-01', periods=2) + mi = pd.MultiIndex.from_product([[1, 2], dt_index]) + etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( + '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( + '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) + tm.assert_numpy_array_equal(mi.values, etalon) + + +def test_from_product_index_series_categorical(): + # GH13743 + first = ['foo', 'bar'] + for ordered in [False, True]: + idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), + ordered=ordered) + expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"), + categories=list("bac"), + ordered=ordered) + + for arr in [idx, pd.Series(idx), idx.values]: + result = pd.MultiIndex.from_product([first, arr]) + tm.assert_index_equal(result.get_level_values(1), expected) + + +def test_from_product(): + + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + tm.assert_index_equal(result, expected) + + +def test_from_product_iterator(): + # GH 18434 + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of iterables."): + MultiIndex.from_product(0) + + +def test_create_index_existing_name(idx): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + index = idx + index.names = ['foo', 'bar'] + result = pd.Index(index) + tm.assert_index_equal( + result, Index(Index([('foo', 'one'), ('foo', 'two'), + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], + dtype='object'), + names=['foo', 'bar'])) + + result = pd.Index(index, names=['A', 'B']) + tm.assert_index_equal( + result, + Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], + dtype='object'), names=['A', 'B'])) + + +def test_tuples_with_name_string(): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + with pytest.raises(ValueError): + pd.Index(li, name='abc') + with pytest.raises(ValueError): + pd.Index(li, name='a') diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py new file mode 100644 index 0000000000000..aaed4467816da --- /dev/null +++ b/pandas/tests/indexes/multi/test_contains.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import MultiIndex +from pandas.compat import PYPY + + +def test_contains_top_level(): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + assert 'A' in midx + assert 'A' not in midx._engine + + +def test_contains_with_nat(): + # MI with a NaT + mi = MultiIndex(levels=[['C'], + pd.date_range('2012-01-01', periods=5)], + labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, 'B']) + assert ('C', pd.Timestamp('2012-01-01')) in mi + for val in mi.values: + assert val in mi + + +def test_contains(idx): + assert ('foo', 'two') in idx + assert ('bar', 'two') not in idx + assert None not in idx + + +@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_pypy(): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, True])) + + +def test_isin(): + values = [('foo', 2), ('bar', 3), ('quux', 4)] + + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = MultiIndex.from_arrays([[], []]) + result = idx.isin(values) + assert len(result) == 0 + assert result.dtype == np.bool_ + + +@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_not_pypy(): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, False])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, False])) + + +def test_isin_level_kwarg(): + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + + vals_0 = ['foo', 'bar', 'quux'] + vals_1 = [2, 3, 10] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) + + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) + + pytest.raises(IndexError, idx.isin, vals_0, level=5) + pytest.raises(IndexError, idx.isin, vals_0, level=-5) + + pytest.raises(KeyError, idx.isin, vals_0, level=1.0) + pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) + pytest.raises(KeyError, idx.isin, vals_1, level='A') + + idx.names = ['A', 'B'] + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) + + pytest.raises(KeyError, idx.isin, vals_1, level='C') diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py new file mode 100644 index 0000000000000..ff99941ba9948 --- /dev/null +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import DataFrame, MultiIndex, date_range +from pandas.compat import PY3, range +from pandas.util.testing import assert_almost_equal + + +def test_tolist(idx): + result = idx.tolist() + exp = list(idx.values) + assert result == exp + + +def test_to_frame(): + tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] + + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] + index = MultiIndex.from_tuples(tuples, names=['first', 'second']) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([range(5), + pd.date_range('20130101', periods=3)]) + result = index.to_frame(index=False) + expected = DataFrame( + {0: np.repeat(np.arange(5, dtype='int64'), 3), + 1: np.tile(pd.date_range('20130101', periods=3), 5)}) + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([range(5), + pd.date_range('20130101', periods=3)]) + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + +def test_to_hierarchical(): + index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two')]) + result = index.to_hierarchical(3) + expected = MultiIndex(levels=[[1, 2], ['one', 'two']], + labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) + tm.assert_index_equal(result, expected) + assert result.names == index.names + + # K > 1 + result = index.to_hierarchical(3, 2) + expected = MultiIndex(levels=[[1, 2], ['one', 'two']], + labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) + tm.assert_index_equal(result, expected) + assert result.names == index.names + + # non-sorted + index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), + (2, 'a'), (2, 'b')], + names=['N1', 'N2']) + + result = index.to_hierarchical(2) + expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), + (1, 'b'), + (2, 'a'), (2, 'a'), + (2, 'b'), (2, 'b')], + names=['N1', 'N2']) + tm.assert_index_equal(result, expected) + assert result.names == index.names + + +@pytest.mark.skipif(PY3, reason="testing legacy pickles not support on py3") +def test_legacy_pickle(datapath): + + path = datapath('indexes', 'multi', 'data', 'multiindex_v1.pickle') + obj = pd.read_pickle(path) + + obj2 = MultiIndex.from_tuples(obj.values) + assert obj.equals(obj2) + + res = obj.get_indexer(obj) + exp = np.arange(len(obj), dtype=np.intp) + assert_almost_equal(res, exp) + + res = obj.get_indexer(obj2[::-1]) + exp = obj.get_indexer(obj[::-1]) + exp2 = obj2.get_indexer(obj2[::-1]) + assert_almost_equal(res, exp) + assert_almost_equal(exp, exp2) + + +def test_legacy_v2_unpickle(datapath): + + # 0.7.3 -> 0.8.0 format manage + path = datapath('indexes', 'multi', 'data', 'mindex_073.pickle') + obj = pd.read_pickle(path) + + obj2 = MultiIndex.from_tuples(obj.values) + assert obj.equals(obj2) + + res = obj.get_indexer(obj) + exp = np.arange(len(obj), dtype=np.intp) + assert_almost_equal(res, exp) + + res = obj.get_indexer(obj2[::-1]) + exp = obj.get_indexer(obj[::-1]) + exp2 = obj2.get_indexer(obj2[::-1]) + assert_almost_equal(res, exp) + assert_almost_equal(exp, exp2) + + +def test_roundtrip_pickle_with_tz(): + + # GH 8367 + # round-trip of timezone + index = MultiIndex.from_product( + [[1, 2], ['a', 'b'], date_range('20130101', periods=3, + tz='US/Eastern') + ], names=['one', 'two', 'three']) + unpickled = tm.round_trip_pickle(index) + assert index.equal_levels(unpickled) + + +def test_pickle(indices): + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + original_name, indices.name = indices.name, 'foo' + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + indices.name = original_name + + +def test_to_series(idx): + # assert that we are creating a copy of the index + + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + + +def test_to_series_with_arguments(idx): + # GH18699 + + # index kwarg + s = idx.to_series(index=idx) + + assert s.values is not idx.values + assert s.index is idx + assert s.name == idx.name + + # name kwarg + idx = idx + s = idx.to_series(name='__test') + + assert s.values is not idx.values + assert s.index is not idx + assert s.name != idx.name diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py new file mode 100644 index 0000000000000..282f2fa84efe0 --- /dev/null +++ b/pandas/tests/indexes/multi/test_copy.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +from copy import copy, deepcopy + +import pandas.util.testing as tm +from pandas import (CategoricalIndex, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, compat) + + +def assert_multiindex_copied(copy, original): + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.labels, original.labels) + + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.labels, original.labels) + assert copy.labels is not original.labels + + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names + + # Sort order should be copied + assert copy.sortorder == original.sortorder + + +def test_copy(idx): + i_copy = idx.copy() + + assert_multiindex_copied(i_copy, idx) + + +def test_shallow_copy(idx): + i_copy = idx._shallow_copy() + + assert_multiindex_copied(i_copy, idx) + + +def test_view(idx): + i_view = idx.view() + assert_multiindex_copied(i_view, idx) + + +def test_copy_name(idx): + # gh-12309: Check that the "name" argument + # passed at initialization is honored. + + # TODO: Remove or refactor MultiIndex not tested. + for name, index in compat.iteritems({'idx': idx}): + if isinstance(index, MultiIndex): + continue + + first = index.__class__(index, copy=True, name='mario') + second = first.__class__(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + + # Not using tm.assert_index_equal() since names differ. + assert index.equals(first) + + assert first.name == 'mario' + assert second.name == 'mario' + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + + if not isinstance(index, CategoricalIndex): + # See gh-13365 + s3 = s1 * s2 + assert s3.index.name == 'mario' + + +def test_ensure_copied_data(idx): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + # TODO: REMOVE THIS TEST. MultiIndex is tested seperately as noted below. + + for name, index in compat.iteritems({'idx': idx}): + init_kwargs = {} + if isinstance(index, PeriodIndex): + # Needs "freq" specification: + init_kwargs['freq'] = index.freq + elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): + # RangeIndex cannot be initialized from data + # MultiIndex and CategoricalIndex are tested separately + continue + + index_type = index.__class__ + result = index_type(index.values, copy=True, **init_kwargs) + tm.assert_index_equal(index, result) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='copy') + + if isinstance(index, PeriodIndex): + # .values an object array of Period, thus copied + result = index_type(ordinal=index.asi8, copy=False, + **init_kwargs) + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, + check_same='same') + elif isinstance(index, IntervalIndex): + # checked in test_interval.py + pass + else: + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='same') + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, + check_same='same') + + +def test_copy_and_deepcopy(indices): + + if isinstance(indices, MultiIndex): + return + for func in (copy, deepcopy): + idx_copy = func(indices) + assert idx_copy is not indices + assert idx_copy.equals(indices) + + new_copy = indices.copy(deep=True, name="banana") + assert new_copy.name == "banana" diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py new file mode 100644 index 0000000000000..281db7fd2c8a7 --- /dev/null +++ b/pandas/tests/indexes/multi/test_drop.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Index, MultiIndex +from pandas.compat import lrange +from pandas.errors import PerformanceWarning + + +def test_drop(idx): + dropped = idx.drop([('foo', 'two'), ('qux', 'one')]) + + index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')]) + dropped2 = idx.drop(index) + + expected = idx[[0, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped2, expected) + + dropped = idx.drop(['bar']) + expected = idx[[0, 1, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop('foo') + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + index = MultiIndex.from_tuples([('bar', 'two')]) + pytest.raises(KeyError, idx.drop, [('bar', 'two')]) + pytest.raises(KeyError, idx.drop, index) + pytest.raises(KeyError, idx.drop, ['foo', 'two']) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) + pytest.raises(KeyError, idx.drop, mixed_index) + + # error='ignore' + dropped = idx.drop(index, errors='ignore') + expected = idx[[0, 1, 2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(mixed_index, errors='ignore') + expected = idx[[0, 1, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(['foo', 'two'], errors='ignore') + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop + dropped = idx.drop(['foo', ('qux', 'one')]) + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ['foo', ('qux', 'one'), 'two'] + pytest.raises(KeyError, idx.drop, mixed_index) + dropped = idx.drop(mixed_index, errors='ignore') + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + +def test_droplevel_with_names(idx): + index = idx[idx.get_loc('foo')] + dropped = index.droplevel(0) + assert dropped.name == 'second' + + index = MultiIndex( + levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], + labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) + dropped = index.droplevel(0) + assert dropped.names == ('two', 'three') + + dropped = index.droplevel('two') + expected = index.droplevel(1) + assert dropped.equals(expected) + + +def test_droplevel_list(): + index = MultiIndex( + levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], + labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) + + dropped = index[:2].droplevel(['three', 'one']) + expected = index[:2].droplevel(2).droplevel(0) + assert dropped.equals(expected) + + dropped = index[:2].droplevel([]) + expected = index[:2] + assert dropped.equals(expected) + + with pytest.raises(ValueError): + index[:2].droplevel(['one', 'two', 'three']) + + with pytest.raises(KeyError): + index[:2].droplevel(['one', 'four']) + + +def test_drop_not_lexsorted(): + # GH 12078 + + # define the lexsorted version of the multi-index + tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')] + lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c']) + assert lexsorted_mi.is_lexsorted() + + # and the not-lexsorted version + df = pd.DataFrame(columns=['a', 'b', 'c', 'd'], + data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]]) + df = df.pivot_table(index='a', columns=['b', 'c'], values='d') + df = df.reset_index() + not_lexsorted_mi = df.columns + assert not not_lexsorted_mi.is_lexsorted() + + # compare the results + tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_index_equal(lexsorted_mi.drop('a'), + not_lexsorted_mi.drop('a')) diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py new file mode 100644 index 0000000000000..0bebe3165e2e8 --- /dev/null +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +from pandas import Index, MultiIndex, RangeIndex, Series, compat +from pandas.compat import lrange, lzip, range + + +def test_equals(idx): + # TODO: Remove or Refactor. MultiIndex not tested. + for name, idx in compat.iteritems({'idx': idx}): + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + # Cannot pass in non-int64 dtype to RangeIndex + if not isinstance(idx, RangeIndex): + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(pd.Series(idx)) + + +def test_equals_op(idx): + # GH9947, GH10637 + index_a = idx + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == index_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == series_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with tm.assert_raises_regex(ValueError, msg): + series_a == series_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + +def test_equals_multi(idx): + assert idx.equals(idx) + assert not idx.equals(idx.values) + assert idx.equals(Index(idx.values)) + + assert idx.equal_levels(idx) + assert not idx.equals(idx[:-1]) + assert not idx.equals(idx[-1]) + + # different number of levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) + assert not index.equals(index2) + assert not index.equal_levels(index2) + + # levels are different + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3]) + minor_labels = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + assert not idx.equals(index) + assert not idx.equal_levels(index) + + # some of the labels are different + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + assert not idx.equals(index) + + +def test_identical(idx): + mi = idx.copy() + mi2 = idx.copy() + assert mi.identical(mi2) + + mi = mi.set_names(['new1', 'new2']) + assert mi.equals(mi2) + assert not mi.identical(mi2) + + mi2 = mi2.set_names(['new1', 'new2']) + assert mi.identical(mi2) + + mi3 = Index(mi.tolist(), names=mi.names) + mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) + assert mi.identical(mi3) + assert not mi.identical(mi4) + assert mi.equals(mi4) + + +def test_equals_operator(idx): + # GH9785 + assert (idx == idx).all() + + +def test_equals_missing_values(): + # make sure take is not using -1 + i = pd.MultiIndex.from_tuples([(0, pd.NaT), + (0, pd.Timestamp('20130101'))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result + + +def test_is_(): + mi = MultiIndex.from_tuples(lzip(range(10), range(10))) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + assert mi2.is_(mi) + assert mi.is_(mi2) + + assert mi.is_(mi.set_names(["C", "D"])) + mi2 = mi.view() + mi2.set_names(["E", "F"], inplace=True) + assert mi.is_(mi2) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([lrange(10), lrange(10)]) + assert not mi3.is_(mi2) + # shouldn't change + assert mi2.is_(mi) + mi4 = mi3.view() + + # GH 17464 - Remove duplicate MultiIndex levels + mi4.set_levels([lrange(10), lrange(10)], inplace=True) + assert not mi4.is_(mi3) + mi5 = mi.view() + mi5.set_levels(mi5.levels, inplace=True) + assert not mi5.is_(mi) + + +def test_is_all_dates(idx): + assert not idx.is_all_dates + + +def test_is_numeric(idx): + # MultiIndex is never numeric + assert not idx.is_numeric() + + +def test_multiindex_compare(): + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 + + midx = pd.MultiIndex.from_product([[0, 1]]) + + # Equality self-test: MultiIndex object vs self + expected = pd.Series([True, True]) + result = pd.Series(midx == midx) + tm.assert_series_equal(result, expected) + + # Greater than comparison: MultiIndex object vs self + expected = pd.Series([False, False]) + result = pd.Series(midx > midx) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py new file mode 100644 index 0000000000000..21e8a199cadd9 --- /dev/null +++ b/pandas/tests/indexes/multi/test_format.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- + + +import warnings + +import pandas as pd +import pandas.util.testing as tm +from pandas import MultiIndex, compat +from pandas.compat import PY3, range, u + + +def test_dtype_str(indices): + dtype = indices.dtype_str + assert isinstance(dtype, compat.string_types) + assert dtype == str(indices.dtype) + + +def test_format(idx): + idx.format() + idx[:0].format() + + +def test_format_integer_names(): + index = MultiIndex(levels=[[0, 1], [0, 1]], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) + index.format(names=True) + + +def test_format_sparse_config(idx): + warn_filters = warnings.filters + warnings.filterwarnings('ignore', category=FutureWarning, + module=".*format") + # GH1538 + pd.set_option('display.multi_sparse', False) + + result = idx.format() + assert result[1] == 'foo two' + + tm.reset_display_options() + + warnings.filters = warn_filters + + +def test_format_sparse_display(): + index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], + labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) + + result = index.format() + assert result[3] == '1 0 0 0' + + +def test_repr_with_unicode_data(): + with pd.core.config.option_context("display.encoding", 'UTF-8'): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + assert "\\u" not in repr(index) # we don't want unicode-escaped + + +def test_repr_roundtrip(): + + mi = MultiIndex.from_product([list('ab'), range(3)], + names=['first', 'second']) + str(mi) + + if PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' + + mi_u = MultiIndex.from_product( + [list(u'ab'), range(3)], names=['first', 'second']) + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) + + # formatting + if PY3: + str(mi) + else: + compat.text_type(mi) + + # long format + mi = MultiIndex.from_product([list('abcdefg'), range(10)], + names=['first', 'second']) + + if PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' + + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) + + +def test_str(): + # tested elsewhere + pass + + +def test_unicode_string_with_unicode(): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + + if PY3: + str(idx) + else: + compat.text_type(idx) + + +def test_bytestring_with_unicode(): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + + if PY3: + bytes(idx) + else: + str(idx) + + +def test_repr_max_seq_item_setting(idx): + # GH10182 + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert '...' not in str(idx) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py new file mode 100644 index 0000000000000..56fd4c04cb96e --- /dev/null +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -0,0 +1,423 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import CategoricalIndex, Index, MultiIndex +from pandas.compat import range + + +def test_get_level_number_integer(idx): + idx.names = [1, 0] + assert idx._get_level_number(1) == 0 + assert idx._get_level_number(0) == 1 + pytest.raises(IndexError, idx._get_level_number, 2) + tm.assert_raises_regex(KeyError, 'Level fourth not found', + idx._get_level_number, 'fourth') + + +def test_get_level_values(idx): + result = idx.get_level_values(0) + expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], + name='first') + tm.assert_index_equal(result, expected) + assert result.name == 'first' + + result = idx.get_level_values('first') + expected = idx.get_level_values(0) + tm.assert_index_equal(result, expected) + + # GH 10460 + index = MultiIndex( + levels=[CategoricalIndex(['A', 'B']), + CategoricalIndex([1, 2, 3])], + labels=[np.array([0, 0, 0, 1, 1, 1]), + np.array([0, 1, 2, 0, 1, 2])]) + + exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) + tm.assert_index_equal(index.get_level_values(0), exp) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) + tm.assert_index_equal(index.get_level_values(1), exp) + + +def test_get_value_duplicates(): + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + + assert index.get_loc('D') == slice(0, 3) + with pytest.raises(KeyError): + index._engine.get_value(np.array([]), 'D') + + +def test_get_level_values_all_na(): + # GH 17924 when level entirely consists of nan + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_int_with_na(): + # GH 17924 + arrays = [['a', 'b', 'b'], [1, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([1, np.nan, 2]) + tm.assert_index_equal(result, expected) + + arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([np.nan, np.nan, 2]) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_na(): + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan]) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1]) + tm.assert_index_equal(result, expected) + + arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + tm.assert_index_equal(result, expected) + + arrays = [[], []] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_set_name_methods(idx, index_names): + # so long as these are synonyms, we don't need to test set_names + assert idx.rename == idx.set_names + new_names = [name + "SUFFIX" for name in index_names] + ind = idx.set_names(new_names) + assert idx.names == index_names + assert ind.names == new_names + with tm.assert_raises_regex(ValueError, "^Length"): + ind.set_names(new_names + new_names) + new_names2 = [name + "SUFFIX2" for name in new_names] + res = ind.set_names(new_names2, inplace=True) + assert res is None + assert ind.names == new_names2 + + # set names for specific level (# GH7792) + ind = idx.set_names(new_names[0], level=0) + assert idx.names == index_names + assert ind.names == [new_names[0], index_names[1]] + + res = ind.set_names(new_names2[0], level=0, inplace=True) + assert res is None + assert ind.names == [new_names2[0], index_names[1]] + + # set names for multiple levels + ind = idx.set_names(new_names, level=[0, 1]) + assert idx.names == index_names + assert ind.names == new_names + + res = ind.set_names(new_names2, level=[0, 1], inplace=True) + assert res is None + assert ind.names == new_names2 + + +def test_set_levels_labels_directly(idx): + # setting levels/labels directly raises AttributeError + + levels = idx.levels + new_levels = [[lev + 'a' for lev in level] for level in levels] + + labels = idx.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + with pytest.raises(AttributeError): + idx.levels = new_levels + + with pytest.raises(AttributeError): + idx.labels = new_labels + + +def test_set_levels(idx): + # side note - you probably wouldn't want to use levels and labels + # directly like this - but it is possible. + levels = idx.levels + new_levels = [[lev + 'a' for lev in level] for level in levels] + + def assert_matching(actual, expected, check_dtype=False): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) + + # level changing [w/o mutation] + ind2 = idx.set_levels(new_levels) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + + # level changing specific level [w/o mutation] + ind2 = idx.set_levels(new_levels[0], level=0) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.set_levels(new_levels[1], level=1) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/o mutation] + ind2 = idx.set_levels(new_levels, level=[0, 1]) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing specific level [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels, level=[0, 1], + inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # illegal level changing should not change levels + # GH 13754 + original_index = idx.copy() + for inplace in [True, False]: + with tm.assert_raises_regex(ValueError, "^On"): + idx.set_levels(['c'], level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, + check_dtype=True) + + with tm.assert_raises_regex(ValueError, "^On"): + idx.set_labels([0, 1, 2, 3, 4, 5], level=0, + inplace=inplace) + assert_matching(idx.labels, original_index.labels, + check_dtype=True) + + with tm.assert_raises_regex(TypeError, "^Levels"): + idx.set_levels('c', level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, + check_dtype=True) + + with tm.assert_raises_regex(TypeError, "^Labels"): + idx.set_labels(1, level=0, inplace=inplace) + assert_matching(idx.labels, original_index.labels, + check_dtype=True) + + +def test_set_labels(idx): + # side note - you probably wouldn't want to use levels and labels + # directly like this - but it is possible. + labels = idx.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + def assert_matching(actual, expected): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp, dtype=np.int8) + tm.assert_numpy_array_equal(act, exp) + + # label changing [w/o mutation] + ind2 = idx.set_labels(new_labels) + assert_matching(ind2.labels, new_labels) + assert_matching(idx.labels, labels) + + # label changing [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_labels(new_labels, inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, new_labels) + + # label changing specific level [w/o mutation] + ind2 = idx.set_labels(new_labels[0], level=0) + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(idx.labels, labels) + + ind2 = idx.set_labels(new_labels[1], level=1) + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(idx.labels, labels) + + # label changing multiple levels [w/o mutation] + ind2 = idx.set_labels(new_labels, level=[0, 1]) + assert_matching(ind2.labels, new_labels) + assert_matching(idx.labels, labels) + + # label changing specific level [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(idx.labels, labels) + + ind2 = idx.copy() + inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(idx.labels, labels) + + # label changing multiple levels [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_labels(new_labels, level=[0, 1], + inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, new_labels) + assert_matching(idx.labels, labels) + + # label changing for levels of different magnitude of categories + ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_labels = range(129, -1, -1) + expected = pd.MultiIndex.from_tuples( + [(0, i) for i in new_labels]) + + # [w/o mutation] + result = ind.set_labels(labels=new_labels, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + result.set_labels(labels=new_labels, level=1, inplace=True) + assert result.equals(expected) + + +def test_set_levels_labels_names_bad_input(idx): + levels, labels = idx.levels, idx.labels + names = idx.names + + with tm.assert_raises_regex(ValueError, 'Length of levels'): + idx.set_levels([levels[0]]) + + with tm.assert_raises_regex(ValueError, 'Length of labels'): + idx.set_labels([labels[0]]) + + with tm.assert_raises_regex(ValueError, 'Length of names'): + idx.set_names([names[0]]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + idx.set_levels(levels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + idx.set_labels(labels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assert_raises_regex(TypeError, 'list-like'): + idx.set_names(names[0]) + + # should have equal lengths + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + idx.set_levels(levels[0], level=[0, 1]) + + with tm.assert_raises_regex(TypeError, 'list-like'): + idx.set_levels(levels, level=0) + + # should have equal lengths + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + idx.set_labels(labels[0], level=[0, 1]) + + with tm.assert_raises_regex(TypeError, 'list-like'): + idx.set_labels(labels, level=0) + + # should have equal lengths + with tm.assert_raises_regex(ValueError, 'Length of names'): + idx.set_names(names[0], level=[0, 1]) + + with tm.assert_raises_regex(TypeError, 'string'): + idx.set_names(names, level=0) + + +@pytest.mark.parametrize('inplace', [True, False]) +def test_set_names_with_nlevel_1(inplace): + # GH 21149 + # Ensure that .set_names for MultiIndex with + # nlevels == 1 does not raise any errors + expected = pd.MultiIndex(levels=[[0, 1]], + labels=[[0, 1]], + names=['first']) + m = pd.MultiIndex.from_product([[0, 1]]) + result = m.set_names('first', level=0, inplace=inplace) + + if inplace: + result = m + + tm.assert_index_equal(result, expected) + + +def test_set_levels_categorical(): + # GH13854 + index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) + for ordered in [False, True]: + cidx = CategoricalIndex(list("bac"), ordered=ordered) + result = index.set_levels(cidx, 0) + expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], + labels=index.labels) + tm.assert_index_equal(result, expected) + + result_lvl = result.get_level_values(0) + expected_lvl = CategoricalIndex(list("bacb"), + categories=cidx.categories, + ordered=cidx.ordered) + tm.assert_index_equal(result_lvl, expected_lvl) + + +def test_set_value_keeps_names(): + # motivating example from #3742 + lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] + lev2 = ['1', '2', '3'] * 2 + idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number']) + df = pd.DataFrame( + np.random.randn(6, 4), + columns=['one', 'two', 'three', 'four'], + index=idx) + df = df.sort_index() + assert df._is_copy is None + assert df.index.names == ('Name', 'Number') + df.at[('grethe', '4'), 'one'] = 99.34 + assert df._is_copy is None + assert df.index.names == ('Name', 'Number') diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py new file mode 100644 index 0000000000000..0b528541e5eb6 --- /dev/null +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -0,0 +1,369 @@ +# -*- coding: utf-8 -*- + + +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas import (Categorical, CategoricalIndex, Index, IntervalIndex, + MultiIndex, date_range) +from pandas.compat import lrange +from pandas.core.indexes.base import InvalidIndexError +from pandas.util.testing import assert_almost_equal + + +def test_slice_locs_partial(idx): + sorted_idx, _ = idx.sortlevel(0) + + result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) + assert result == (1, 5) + + result = sorted_idx.slice_locs(None, ('qux', 'one')) + assert result == (0, 5) + + result = sorted_idx.slice_locs(('foo', 'two'), None) + assert result == (1, len(sorted_idx)) + + result = sorted_idx.slice_locs('bar', 'baz') + assert result == (2, 4) + + +def test_slice_locs(): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + + slob = slice(*idx.slice_locs(df.index[5], df.index[15])) + sliced = stacked[slob] + expected = df[5:16].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30), + df.index[15] - timedelta(seconds=30))) + sliced = stacked[slob] + expected = df[6:15].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + +def test_slice_locs_with_type_mismatch(): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, (1, 3)) + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, + df.index[5] + timedelta( + seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + idx.slice_locs(timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + idx.slice_locs(df.index[1], (16, "a")) + + +def test_slice_locs_not_sorted(): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " + "MultiIndex lexsort depth", + index.slice_locs, (1, 0, 1), (2, 1, 0)) + + # works + sorted_index, _ = index.sortlevel(0) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + + +def test_slice_locs_not_contained(): + # some searchsorted action + + index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], + labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], + [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) + + result = index.slice_locs((1, 0), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(1, 5) + assert result == (3, 6) + + result = index.slice_locs((2, 2), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(2, 5) + assert result == (3, 6) + + result = index.slice_locs((1, 0), (6, 3)) + assert result == (3, 8) + + result = index.slice_locs(-1, 10) + assert result == (0, len(index)) + + +def test_insert_base(idx): + + result = idx[1:4] + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(idx): + + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) + + +def test_putmask_with_wrong_mask(idx): + # GH18368 + + with pytest.raises(ValueError): + idx.putmask(np.ones(len(idx) + 1, np.bool), 1) + + with pytest.raises(ValueError): + idx.putmask(np.ones(len(idx) - 1, np.bool), 1) + + with pytest.raises(ValueError): + idx.putmask('foo', 1) + + +def test_get_indexer(): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + + r1 = idx2.get_indexer(idx1, method='pad') + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='pad') + assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method='backfill') + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='backfill') + assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method='bfill') + assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() + + # create index with duplicates + idx1 = Index(lrange(10) + lrange(10)) + idx2 = Index(lrange(20)) + + msg = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, msg): + idx1.get_indexer(idx2) + + +def test_get_indexer_nearest(): + midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='nearest') + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='pad', tolerance=2) + + +def test_getitem(idx): + # scalar + assert idx[2] == ('bar', 'one') + + # slice + result = idx[2:5] + expected = idx[[2, 3, 4]] + assert result.equals(expected) + + # boolean + result = idx[[True, False, True, False, True, True]] + result2 = idx[np.array([True, False, True, False, True, True])] + expected = idx[[0, 2, 4, 5]] + assert result.equals(expected) + assert result2.equals(expected) + + +def test_getitem_group_select(idx): + sorted_idx, _ = idx.sortlevel(0) + assert sorted_idx.get_loc('baz') == slice(3, 4) + assert sorted_idx.get_loc('foo') == slice(0, 2) + + +def test_get_indexer_consistency(idx): + # See GH 16819 + if isinstance(idx, IntervalIndex): + pass + + if idx.is_unique or isinstance(idx, CategoricalIndex): + indexer = idx.get_indexer(idx[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = idx.get_indexer(idx[0:2]) + + indexer, _ = idx.get_indexer_non_unique(idx[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + +def test_get_loc(idx): + assert idx.get_loc(('foo', 'two')) == 1 + assert idx.get_loc(('baz', 'two')) == 3 + pytest.raises(KeyError, idx.get_loc, ('bar', 'two')) + pytest.raises(KeyError, idx.get_loc, 'quux') + + pytest.raises(NotImplementedError, idx.get_loc, 'foo', + method='nearest') + + # 3 levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + pytest.raises(KeyError, index.get_loc, (1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + +def test_get_loc_duplicates(): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + # pytest.raises(Exception, index.get_loc, 2) + + index = Index(['c', 'a', 'a', 'b', 'b']) + rs = index.get_loc('c') + xp = 0 + assert rs == xp + + +def test_get_loc_level(): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + pytest.raises(KeyError, index.get_loc_level, (2, 2)) + + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( + [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + +@pytest.mark.parametrize('dtype1', [int, float, bool, str]) +@pytest.mark.parametrize('dtype2', [int, float, bool, str]) +def test_get_loc_multiple_dtypes(dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), + np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + +@pytest.mark.parametrize('level', [0, 1]) +@pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) +def test_get_loc_implicit_cast(level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_cast_bool(): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype='int64')] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + pytest.raises(KeyError, idx.get_loc, (False, True)) + pytest.raises(KeyError, idx.get_loc, (True, False)) + + +@pytest.mark.parametrize('level', [0, 1]) +def test_get_loc_nan(level, nulls_fixture): + # GH 18485 : NaN in MultiIndex + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) + key[level] = nulls_fixture + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_missing_nan(): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) + + +def test_get_indexer_categorical_time(): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [Categorical(['a', 'b', 'c']), + Categorical(date_range("2012-01-01", periods=3, freq='H'))]) + result = midx.get_indexer(midx) + tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py new file mode 100644 index 0000000000000..7a8f8b60d31ba --- /dev/null +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -0,0 +1,288 @@ +# -*- coding: utf-8 -*- + +import re + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import IntervalIndex, MultiIndex, RangeIndex +from pandas.compat import lrange, range +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + + +def test_labels_dtypes(): + + # GH 8456 + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + assert i.labels[0].dtype == 'int8' + assert i.labels[1].dtype == 'int8' + + i = MultiIndex.from_product([['a'], range(40)]) + assert i.labels[1].dtype == 'int8' + i = MultiIndex.from_product([['a'], range(400)]) + assert i.labels[1].dtype == 'int16' + i = MultiIndex.from_product([['a'], range(40000)]) + assert i.labels[1].dtype == 'int32' + + i = pd.MultiIndex.from_product([['a'], range(1000)]) + assert (i.labels[0] >= 0).all() + assert (i.labels[1] >= 0).all() + + +def test_values_boxed(): + tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), + (3, pd.Timestamp('2000-01-03')), + (1, pd.Timestamp('2000-01-04')), + (2, pd.Timestamp('2000-01-02')), + (3, pd.Timestamp('2000-01-03'))] + result = pd.MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) + # Check that code branches for boxed values produce identical results + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + + +def test_values_multiindex_datetimeindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10 ** 18, 10 ** 18 + 5) + naive = pd.DatetimeIndex(ints) + aware = pd.DatetimeIndex(ints, tz='US/Central') + + idx = pd.MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + +def test_values_multiindex_periodindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq='D') + + idx = pd.MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints[:2])) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) + + +def test_consistency(): + # need to construct an overflow + major_axis = lrange(70000) + minor_axis = lrange(10) + + major_labels = np.arange(70000) + minor_labels = np.repeat(lrange(10), 7000) + + # the fact that is works means it's consistent + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + # inconsistent + major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + assert not index.is_unique + + +def test_hash_collisions(): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product([np.arange(1000), np.arange(1000)], + names=['one', 'two']) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange( + len(index), dtype='intp')) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i + + +def test_dims(): + pass + + +def take_invalid_kwargs(): + vals = [['A', 'B'], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] + idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + +def test_isna_behavior(idx): + # should not segfault GH5123 + # NOTE: if MI representation changes, may make sense to allow + # isna(MI) + with pytest.raises(NotImplementedError): + pd.isna(idx) + + +def test_large_multiindex_error(): + # GH12527 + df_below_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), + columns=['dest']) + with pytest.raises(KeyError): + df_below_1000000.loc[(-1, 0), 'dest'] + with pytest.raises(KeyError): + df_below_1000000.loc[(3, 0), 'dest'] + df_above_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), + columns=['dest']) + with pytest.raises(KeyError): + df_above_1000000.loc[(-1, 0), 'dest'] + with pytest.raises(KeyError): + df_above_1000000.loc[(3, 0), 'dest'] + + +def test_million_record_attribute_error(): + # GH 18165 + r = list(range(1000000)) + df = pd.DataFrame({'a': r, 'b': r}, + index=pd.MultiIndex.from_tuples([(x, x) for x in r])) + + with tm.assert_raises_regex(AttributeError, + "'Series' object has no attribute 'foo'"): + df['a'].foo() + + +def test_can_hold_identifiers(idx): + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + +def test_metadata_immutable(idx): + levels, labels = idx.levels, idx.labels + # shouldn't be able to set at either the top level or base level + mutable_regex = re.compile('does not support mutable operations') + with tm.assert_raises_regex(TypeError, mutable_regex): + levels[0] = levels[0] + with tm.assert_raises_regex(TypeError, mutable_regex): + levels[0][0] = levels[0][0] + # ditto for labels + with tm.assert_raises_regex(TypeError, mutable_regex): + labels[0] = labels[0] + with tm.assert_raises_regex(TypeError, mutable_regex): + labels[0][0] = labels[0][0] + # and for names + names = idx.names + with tm.assert_raises_regex(TypeError, mutable_regex): + names[0] = names[0] + + +def test_level_setting_resets_attributes(): + ind = pd.MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert ind.is_monotonic + ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) + # if this fails, probably didn't reset the cache correctly. + assert not ind.is_monotonic + + +def test_rangeindex_fallback_coercion_bug(): + # GH 12893 + foo = pd.DataFrame(np.arange(100).reshape((10, 10))) + bar = pd.DataFrame(np.arange(100).reshape((10, 10))) + df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1) + df.index.names = ['fizz', 'buzz'] + + str(df) + expected = pd.DataFrame({'bar': np.arange(100), + 'foo': np.arange(100)}, + index=pd.MultiIndex.from_product( + [range(10), range(10)], + names=['fizz', 'buzz'])) + tm.assert_frame_equal(df, expected, check_like=True) + + result = df.index.get_level_values('fizz') + expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10) + tm.assert_index_equal(result, expected) + + result = df.index.get_level_values('buzz') + expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz') + tm.assert_index_equal(result, expected) + + +def test_hash_error(indices): + index = indices + tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__, hash, indices) + + +def test_mutability(indices): + if not len(indices): + return + pytest.raises(TypeError, indices.__setitem__, 0, indices[0]) + + +def test_wrong_number_names(indices): + def testit(ind): + ind.names = ["apple", "banana", "carrot"] + tm.assert_raises_regex(ValueError, "^Length", testit, indices) + + +def test_memory_usage(idx): + result = idx.memory_usage() + if len(idx): + idx.get_loc(idx[0]) + result2 = idx.memory_usage() + result3 = idx.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(idx, (RangeIndex, IntervalIndex)): + assert result2 > result + + if idx.inferred_type == 'object': + assert result3 > result2 + + else: + + # we report 0 for no-length + assert result == 0 + + +def test_nlevels(idx): + assert idx.nlevels == 2 diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py new file mode 100644 index 0000000000000..4a386c6e8dbe4 --- /dev/null +++ b/pandas/tests/indexes/multi/test_join.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Index, MultiIndex + + +@pytest.mark.parametrize('other', + [Index(['three', 'one', 'two']), + Index(['one']), + Index(['one', 'three'])]) +def test_join_level(idx, other, join_type): + join_index, lidx, ridx = other.join(idx, how=join_type, + level='second', + return_indexers=True) + + exp_level = other.join(idx.levels[1], how=join_type) + assert join_index.levels[0].equals(idx.levels[0]) + assert join_index.levels[1].equals(exp_level) + + # pare down levels + mask = np.array( + [x[1] in exp_level for x in idx], dtype=bool) + exp_values = idx.values[mask] + tm.assert_numpy_array_equal(join_index.values, exp_values) + + if join_type in ('outer', 'inner'): + join_index2, ridx2, lidx2 = \ + idx.join(other, how=join_type, level='second', + return_indexers=True) + + assert join_index.equals(join_index2) + tm.assert_numpy_array_equal(lidx, lidx2) + tm.assert_numpy_array_equal(ridx, ridx2) + tm.assert_numpy_array_equal(join_index2.values, exp_values) + + +def test_join_level_corner_case(idx): + # some corner cases + index = Index(['three', 'one', 'two']) + result = index.join(idx, level='second') + assert isinstance(result, MultiIndex) + + tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", + idx.join, idx, level=1) + + +def test_join_self(idx, join_type): + joined = idx.join(idx, how=join_type) + assert idx is joined + + +def test_join_multi(): + # GH 10665 + midx = pd.MultiIndex.from_product( + [np.arange(4), np.arange(4)], names=['a', 'b']) + idx = pd.Index([1, 2, 5], name='b') + + # inner + jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) + exp_idx = pd.MultiIndex.from_product( + [np.arange(4), [1, 2]], names=['a', 'b']) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + # keep MultiIndex + jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) + exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, + 1, -1], dtype=np.intp) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_self_unique(idx, join_type): + if idx.is_unique: + joined = idx.join(idx, how=join_type) + assert (idx == joined).all() diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py new file mode 100644 index 0000000000000..01465ea4c2f3b --- /dev/null +++ b/pandas/tests/indexes/multi/test_missing.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index, isna +from pandas._libs.tslib import iNaT +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +def test_fillna(idx): + # GH 11343 + + # TODO: Remove or Refactor. Not Implemented for MultiIndex + for name, index in [('idx', idx), ]: + if len(index) == 0: + pass + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.fillna(idx[0]) + else: + idx = index.copy() + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + + msg = "'value' must be a scalar, passed: " + with tm.assert_raises_regex(TypeError, msg): + idx.fillna([idx[0]]) + + idx = index.copy() + values = idx.values + + if isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(index, (Int64Index, UInt64Index)): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans + + +def test_dropna(): + # GH 6194 + idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5], + [1, 2, np.nan, np.nan, 5], + ['a', 'b', 'c', np.nan, 'e']]) + + exp = pd.MultiIndex.from_arrays([[1, 5], + [1, 5], + ['a', 'e']]) + tm.assert_index_equal(idx.dropna(), exp) + tm.assert_index_equal(idx.dropna(how='any'), exp) + + exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], + [1, 2, np.nan, 5], + ['a', 'b', 'c', 'e']]) + tm.assert_index_equal(idx.dropna(how='all'), exp) + + msg = "invalid how option: xxx" + with tm.assert_raises_regex(ValueError, msg): + idx.dropna(how='xxx') + + +def test_nulls(idx): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + + # TODO: Remove or Refactor. MultiIndex not Implemeted. + for name, index in [('idx', idx), ]: + if len(index) == 0: + tm.assert_numpy_array_equal( + index.isna(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.isna() + else: + + if not index.hasnans: + tm.assert_numpy_array_equal( + index.isna(), np.zeros(len(index), dtype=bool)) + tm.assert_numpy_array_equal( + index.notna(), np.ones(len(index), dtype=bool)) + else: + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) + + +@pytest.mark.xfail +def test_hasnans_isnans(idx): + # GH 11343, added tests for hasnans / isnans + index = idx.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(index), dtype=bool) + tm.assert_numpy_array_equal(index._isnan, expected) + assert not index.hasnans + + index = idx.copy() + values = index.values + values[1] = np.nan + + index = idx.__class__(values) + + expected = np.array([False] * len(index), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans + + +def test_nan_stays_float(): + + # GH 7031 + idx0 = pd.MultiIndex(levels=[["A", "B"], []], + labels=[[1, 0], [-1, -1]], + names=[0, 1]) + idx1 = pd.MultiIndex(levels=[["C"], ["D"]], + labels=[[0], [0]], + names=[0, 1]) + idxm = idx0.join(idx1, how='outer') + assert pd.isna(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isna(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py new file mode 100644 index 0000000000000..f02447e27ab81 --- /dev/null +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd +import pytest +from pandas import Index, IntervalIndex, MultiIndex + + +def test_is_monotonic_increasing(): + i = MultiIndex.from_product([np.arange(10), + np.arange(10)], names=['one', 'two']) + assert i.is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + # string ordering + i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert not i.is_monotonic + assert not Index(i.values).is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], + ['mom', 'next', 'zenith']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert i.is_monotonic + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values)._is_strictly_monotonic_increasing + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', + 'nl0000289783', + 'nl0000289965', 'nl0000301109']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values)._is_strictly_monotonic_increasing + + +def test_is_monotonic_decreasing(): + i = MultiIndex.from_product([np.arange(9, -1, -1), + np.arange(9, -1, -1)], + names=['one', 'two']) + assert i.is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']]) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + # string ordering + i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], + ['three', 'two', 'one']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert not i.is_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], + ['zenith', 'next', 'mom']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert i.is_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values)._is_strictly_monotonic_decreasing + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965', + 'nl0000289783', 'lu0197800237', + 'gb00b03mlx29']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values)._is_strictly_monotonic_decreasing + + +def test_is_strictly_monotonic_increasing(): + idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_increasing + assert not idx._is_strictly_monotonic_increasing + + +def test_is_strictly_monotonic_decreasing(): + idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + +def test_searchsorted_monotonic(indices): + # GH17271 + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(indices, (MultiIndex, IntervalIndex)): + return + + # nothing to test if the index is empty + if indices.empty: + return + value = indices[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (indices == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(indices) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if indices.is_monotonic_increasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + ss_left = indices.searchsorted(value, side='left') + assert expected_left == ss_left + + ss_right = indices.searchsorted(value, side='right') + assert expected_right == ss_right + + elif indices.is_monotonic_decreasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + indices._searchsorted_monotonic(value, side='left') diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py new file mode 100644 index 0000000000000..a9fbb55679173 --- /dev/null +++ b/pandas/tests/indexes/multi/test_names.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- + + +import pandas as pd +import pandas.util.testing as tm +from pandas import MultiIndex + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_slice_keep_name(): + x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], + names=['x', 'y']) + assert x[1:].names == x.names + + +def test_index_name_retained(): + # GH9857 + result = pd.DataFrame({'x': [1, 2, 6], + 'y': [2, 2, 8], + 'z': [-5, 0, 5]}) + result = result.set_index('z') + result.loc[10] = [9, 10] + df_expected = pd.DataFrame({'x': [1, 2, 6, 9], + 'y': [2, 2, 8, 10], + 'z': [-5, 0, 5, 10]}) + df_expected = df_expected.set_index('z') + tm.assert_frame_equal(result, df_expected) + + +def test_changing_names(idx): + + # names should be applied to levels + level_names = [level.name for level in idx.levels] + check_level_names(idx, idx.names) + + view = idx.view() + copy = idx.copy() + shallow_copy = idx._shallow_copy() + + # changing names should change level names on object + new_names = [name + "a" for name in idx.names] + idx.names = new_names + check_level_names(idx, new_names) + + # but not on copies + check_level_names(view, level_names) + check_level_names(copy, level_names) + check_level_names(shallow_copy, level_names) + + # and copies shouldn't change original + shallow_copy.names = [name + "c" for name in shallow_copy.names] + check_level_names(idx, new_names) + + +def test_take_preserve_name(idx): + taken = idx.take([3, 0, 1]) + assert taken.names == idx.names + + +def test_copy_names(): + # Check that adding a "names" parameter to the copy is honored + # GH14302 + multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2']) + multi_idx1 = multi_idx.copy() + + assert multi_idx.equals(multi_idx1) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx1.names == ['MyName1', 'MyName2'] + + multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2']) + + assert multi_idx.equals(multi_idx2) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx2.names == ['NewName1', 'NewName2'] + + multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2']) + + assert multi_idx.equals(multi_idx3) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx3.names == ['NewName1', 'NewName2'] + + +def test_names(idx, index_names): + + # names are assigned in setup + names = index_names + level_names = [level.name for level in idx.levels] + assert names == level_names + + # setting bad names on existing + index = idx + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", + list(index.names) + ["third"]) + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", []) + + # initializing with bad names (should always be equivalent) + major_axis, minor_axis = idx.levels + major_labels, minor_labels = idx.labels + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) + + # names are assigned + index.names = ["a", "b"] + ind_names = list(index.names) + level_names = [level.name for level in index.levels] + assert ind_names == level_names diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py new file mode 100644 index 0000000000000..d38cb28039595 --- /dev/null +++ b/pandas/tests/indexes/multi/test_operations.py @@ -0,0 +1,448 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, MultiIndex, + PeriodIndex, TimedeltaIndex, UInt64Index, date_range, + period_range) +from pandas.compat import lrange, range +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.util.testing import assert_copy + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_insert(idx): + # key contained in all levels + new_index = idx.insert(0, ('bar', 'two')) + assert new_index.equal_levels(idx) + assert new_index[0] == ('bar', 'two') + + # key not contained in all levels + new_index = idx.insert(0, ('abc', 'three')) + + exp0 = Index(list(idx.levels[0]) + ['abc'], name='first') + tm.assert_index_equal(new_index.levels[0], exp0) + + exp1 = Index(list(idx.levels[1]) + ['three'], name='second') + tm.assert_index_equal(new_index.levels[1], exp1) + assert new_index[0] == ('abc', 'three') + + # key wrong length + msg = "Item must have length equal to number of levels" + with tm.assert_raises_regex(ValueError, msg): + idx.insert(0, ('foo2',)) + + left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], + columns=['1st', '2nd', '3rd']) + left.set_index(['1st', '2nd'], inplace=True) + ts = left['3rd'].copy(deep=True) + + left.loc[('b', 'x'), '3rd'] = 2 + left.loc[('b', 'a'), '3rd'] = -1 + left.loc[('b', 'b'), '3rd'] = 3 + left.loc[('a', 'x'), '3rd'] = 4 + left.loc[('a', 'w'), '3rd'] = 5 + left.loc[('a', 'a'), '3rd'] = 6 + + ts.loc[('b', 'x')] = 2 + ts.loc['b', 'a'] = -1 + ts.loc[('b', 'b')] = 3 + ts.loc['a', 'x'] = 4 + ts.loc[('a', 'w')] = 5 + ts.loc['a', 'a'] = 6 + + right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2], + ['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4], + ['a', 'w', 5], ['a', 'a', 6]], + columns=['1st', '2nd', '3rd']) + right.set_index(['1st', '2nd'], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right['3rd']) + + # GH9250 + idx = [('test1', i) for i in range(5)] + \ + [('test2', i) for i in range(6)] + \ + [('test', 17), ('test', 18)] + + left = pd.Series(np.linspace(0, 10, 11), + pd.MultiIndex.from_tuples(idx[:-2])) + + left.loc[('test', 17)] = 11 + left.loc[('test', 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), + pd.MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + +def test_bounds(idx): + idx._bounds + + +def test_append(idx): + result = idx[:3].append(idx[3:]) + assert result.equals(idx) + + foos = [idx[:1], idx[1:3], idx[3:]] + result = foos[0].append(foos[1:]) + assert result.equals(idx) + + # empty + result = idx.append([]) + assert result.equals(idx) + + +def test_groupby(idx): + groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = idx.get_values().tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = idx.groupby(idx) + exp = {key: [key] for key in idx} + tm.assert_dict_equal(groups, exp) + + +def test_truncate(): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + result = index.truncate(before=1) + assert 'foo' not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(after=1) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(before=1, after=2) + assert len(result.levels[0]) == 2 + + # after < before + pytest.raises(ValueError, index.truncate, 3, 1) + + +def test_where(): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + + def f(): + i.where(True) + + pytest.raises(NotImplementedError, f) + + +def test_where_array_like(): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + klasses = [list, tuple, np.array, pd.Series] + cond = [False, True] + + for klass in klasses: + def f(): + return i.where(klass(cond)) + pytest.raises(NotImplementedError, f) + + +def test_reorder_levels(idx): + # this blows up + tm.assert_raises_regex(IndexError, '^Too many levels', + idx.reorder_levels, [2, 1, 0]) + + +def test_astype(idx): + expected = idx.copy() + actual = idx.astype('O') + assert_copy(actual.levels, expected.levels) + assert_copy(actual.labels, expected.labels) + check_level_names(actual, expected.names) + + with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): + idx.astype(np.dtype(int)) + + +@pytest.mark.parametrize('ordered', [True, False]) +def test_astype_category(idx, ordered): + # GH 18630 + msg = '> 1 ndim Categorical are not supported at this time' + with tm.assert_raises_regex(NotImplementedError, msg): + idx.astype(CategoricalDtype(ordered=ordered)) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with tm.assert_raises_regex(NotImplementedError, msg): + idx.astype('category') + + +def test_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + with tm.assert_produces_warning(FutureWarning): + result = m.repeat(n=reps) + tm.assert_index_equal(result, expected) + + +def test_numpy_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex( + ValueError, msg, np.repeat, m, reps, axis=1) + + +def test_append_mixed_dtypes(): + # GH 13660 + dti = date_range('2011-01-01', freq='M', periods=3, ) + dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') + pi = period_range('2011-01', freq='M', periods=3) + + mi = MultiIndex.from_arrays([[1, 2, 3], + [1.1, np.nan, 3.3], + ['a', 'b', 'c'], + dti, dti_tz, pi]) + assert mi.nlevels == 6 + + res = mi.append(mi) + exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ['a', 'b', 'c', 'a', 'b', 'c'], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi)]) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z']]) + + res = mi.append(other) + exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], + [1.1, np.nan, 3.3, 'x', 'y', 'z'], + ['a', 'b', 'c', 'x', 'y', 'z'], + dti.append(pd.Index(['x', 'y', 'z'])), + dti_tz.append(pd.Index(['x', 'y', 'z'])), + pi.append(pd.Index(['x', 'y', 'z']))]) + tm.assert_index_equal(res, exp) + + +def test_take(idx): + indexer = [4, 3, 0, 2] + result = idx.take(indexer) + expected = idx[indexer] + assert result.equals(expected) + + if not isinstance(idx, + (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + with pytest.raises(AttributeError): + idx.freq + + +def test_take_invalid_kwargs(idx): + idx = idx + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + +def test_take_fill_value(): + # GH 12631 + vals = [['A', 'B'], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] + idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) + + result = idx.take(np.array([1, 0, -1])) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + ('B', pd.Timestamp('2011-01-02'))] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + (np.nan, pd.NaT)] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + ('B', pd.Timestamp('2011-01-02'))] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +def test_iter(idx): + result = list(idx) + expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] + assert result == expected + + +def test_sub(idx): + + first = idx + + # - now raises (previously was set op difference) + with pytest.raises(TypeError): + first - idx[-3:] + with pytest.raises(TypeError): + idx[-3:] - first + with pytest.raises(TypeError): + idx[-3:] - first.tolist() + with pytest.raises(TypeError): + first.tolist() - idx[-3:] + + +def test_argsort(idx): + result = idx.argsort() + expected = idx.values.argsort() + tm.assert_numpy_array_equal(result, expected) + + +def test_map(idx): + # callable + index = idx + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype('int64') + else: + expected = index + + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, idx: {i: e for e, i in zip(values, idx)}, + lambda values, idx: pd.Series(values, idx)]) +def test_map_dictlike(idx, mapper): + + if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(idx))) + + identity = mapper(idx.values, idx) + + # we don't infer to UInt64 for a dict + if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict): + expected = idx.astype('int64') + else: + expected = idx + + result = idx.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = pd.Index([np.nan] * len(idx)) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + +def test_numpy_ufuncs(idx): + # test ufuncs of numpy 1.9.2. see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + # some functions are skipped because it may return different result + # for unicode input depending on numpy version + + for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, + np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, + np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, + np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, + np.rad2deg]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + # PeriodIndex behavior should be changed in future version + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # coerces to float (e.g. np.sin) + with np.errstate(all='ignore'): + result = func(idx) + exp = Index(func(idx.values), name=idx.name) + + tm.assert_index_equal(result, exp) + assert isinstance(result, pd.Float64Index) + else: + # raise AttributeError or TypeError + if len(idx) == 0: + continue + else: + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + + for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + with pytest.raises(Exception): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # Results in bool array + result = func(idx) + assert isinstance(result, np.ndarray) + assert not isinstance(result, Index) + else: + if len(idx) == 0: + continue + else: + with pytest.raises(Exception): + func(idx) diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py new file mode 100644 index 0000000000000..40e5e26e9cb0f --- /dev/null +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -0,0 +1,98 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas import DataFrame, MultiIndex, date_range + + +def test_partial_string_timestamp_multiindex(): + # GH10331 + dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H') + abc = ['a', 'b', 'c'] + ix = pd.MultiIndex.from_product([dr, abc]) + df = pd.DataFrame({'c1': range(0, 15)}, index=ix) + idx = pd.IndexSlice + + # c1 + # 2016-01-01 00:00:00 a 0 + # b 1 + # c 2 + # 2016-01-01 12:00:00 a 3 + # b 4 + # c 5 + # 2016-01-02 00:00:00 a 6 + # b 7 + # c 8 + # 2016-01-02 12:00:00 a 9 + # b 10 + # c 11 + # 2016-01-03 00:00:00 a 12 + # b 13 + # c 14 + + # partial string matching on a single index + for df_swap in (df.swaplevel(), + df.swaplevel(0), + df.swaplevel(0, 1)): + df_swap = df_swap.sort_index() + just_a = df_swap.loc['a'] + result = just_a.loc['2016-01-01'] + expected = df.loc[idx[:, 'a'], :].iloc[0:2] + expected.index = expected.index.droplevel(1) + tm.assert_frame_equal(result, expected) + + # indexing with IndexSlice + result = df.loc[idx['2016-01-01':'2016-02-01', :], :] + expected = df + tm.assert_frame_equal(result, expected) + + # match on secondary index + result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :] + expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # Even though this syntax works on a single index, this is somewhat + # ambiguous and we don't want to extend this behavior forward to work + # in multi-indexes. This would amount to selecting a scalar from a + # column. + with pytest.raises(KeyError): + df['2016-01-01'] + + # partial string match on year only + result = df.loc['2016'] + expected = df + tm.assert_frame_equal(result, expected) + + # partial string match on date + result = df.loc['2016-01-01'] + expected = df.iloc[0:6] + tm.assert_frame_equal(result, expected) + + # partial string match on date and hour, from middle + result = df.loc['2016-01-02 12'] + expected = df.iloc[9:12] + tm.assert_frame_equal(result, expected) + + # partial string match on secondary index + result = df_swap.loc[idx[:, '2016-01-02'], :] + expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] + tm.assert_frame_equal(result, expected) + + # tuple selector with partial string match on date + result = df.loc[('2016-01-01', 'a'), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # Slicing date on first level should break (of course) + with pytest.raises(KeyError): + df_swap.loc['2016-01-01'] + + # GH12685 (partial string with daily resolution or below) + dr = date_range('2013-01-01', periods=100, freq='D') + ix = MultiIndex.from_product([dr, ['a', 'b']]) + df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix) + + result = df.loc[idx['2013-03':'2013-03', :], :] + expected = df.iloc[118:180] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py new file mode 100644 index 0000000000000..346b23fed7075 --- /dev/null +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +from pandas import Index, MultiIndex + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_reindex(idx): + result, indexer = idx.reindex(list(idx[:4])) + assert isinstance(result, MultiIndex) + check_level_names(result, idx[:4].names) + + result, indexer = idx.reindex(list(idx)) + assert isinstance(result, MultiIndex) + assert indexer is None + check_level_names(result, idx.names) + + +def test_reindex_level(idx): + index = Index(['one']) + + target, indexer = idx.reindex(index, level='second') + target2, indexer2 = index.reindex(idx, level='second') + + exp_index = idx.join(index, level='second', how='right') + exp_index2 = idx.join(index, level='second', how='left') + + assert target.equals(exp_index) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) + + assert target2.equals(exp_index2) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) + + tm.assert_raises_regex(TypeError, "Fill method not supported", + idx.reindex, idx, + method='pad', level='second') + + tm.assert_raises_regex(TypeError, "Fill method not supported", + index.reindex, index, method='bfill', + level='first') + + +def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): + # GH6552 + idx = idx.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] + + idx.names = ['foo', 'bar'] + assert idx.reindex([])[0].names == ['foo', 'bar'] + assert idx.reindex(np.array([]))[0].names == ['foo', 'bar'] + assert idx.reindex(target.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(target.values)[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar'] + + +def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], + names=['foo', 'bar']) + assert idx.reindex([], level=0)[0].names == ['foo', 'bar'] + assert idx.reindex([], level=1)[0].names == ['foo', 'bar'] + + +def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ + + +def test_reindex_base(idx): + idx = idx + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with tm.assert_raises_regex(ValueError, 'Invalid fill method'): + idx.get_indexer(idx, method='invalid') diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py new file mode 100644 index 0000000000000..79a3837aac7f8 --- /dev/null +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +from pandas import (CategoricalIndex, DatetimeIndex, MultiIndex, PeriodIndex, + Series, TimedeltaIndex) + + +def test_setops_errorcases(idx): + # # non-iterable input + cases = [0.5, 'xxx'] + methods = [idx.intersection, idx.union, idx.difference, + idx.symmetric_difference] + + for method in methods: + for case in cases: + tm.assert_raises_regex(TypeError, + "Input must be Index " + "or array-like", + method, case) + + +def test_intersection_base(idx): + first = idx[:5] + second = idx[:3] + intersect = first.intersection(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.intersection(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.intersection(case) + assert tm.equalContents(result, second) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.intersection([1, 2, 3]) + + +def test_union_base(idx): + first = idx[3:] + second = idx[:5] + everything = idx + union = first.union(second) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.union(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.union(case) + assert tm.equalContents(result, everything) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.union([1, 2, 3]) + + +def test_difference_base(idx): + first = idx[2:] + second = idx[:4] + answer = idx[4:] + result = first.difference(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.difference(case) + elif isinstance(idx, CategoricalIndex): + pass + elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + assert result.__class__ == answer.__class__ + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) + else: + result = first.difference(case) + assert tm.equalContents(result, answer) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.difference([1, 2, 3]) + + +def test_symmetric_difference(idx): + first = idx[1:] + second = idx[:-1] + if isinstance(idx, CategoricalIndex): + pass + else: + answer = idx[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.symmetric_difference(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + first.symmetric_difference([1, 2, 3]) + + +def test_empty(idx): + # GH 15270 + assert not idx.empty + assert idx[:0].empty + + +def test_difference(idx): + + first = idx + result = first.difference(idx[-3:]) + expected = MultiIndex.from_tuples(sorted(idx[:-3].values), + sortorder=0, + names=idx.names) + + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: reflexive + result = idx.difference(idx) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: superset + result = idx[-3:].difference(idx) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: degenerate + result = idx[:0].difference(idx) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # names not the same + chunklet = idx[-3:] + chunklet.names = ['foo', 'baz'] + result = first.difference(chunklet) + assert result.names == (None, None) + + # empty, but non-equal + result = idx.difference(idx.sortlevel(1)[0]) + assert len(result) == 0 + + # raise Exception called with non-MultiIndex + result = first.difference(first.values) + assert result.equals(first[:0]) + + # name from empty array + result = first.difference([]) + assert first.equals(result) + assert first.names == result.names + + # name from non-empty array + result = first.difference([('foo', 'one')]) + expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( + 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) + expected.names = first.names + assert first.names == result.names + tm.assert_raises_regex(TypeError, "other must be a MultiIndex " + "or a list of tuples", + first.difference, [1, 2, 3, 4, 5]) + + +def test_union(idx): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_union = piece1 | piece2 + + tups = sorted(idx.values) + expected = MultiIndex.from_tuples(tups) + + assert the_union.equals(expected) + + # corner case, pass self or empty thing: + the_union = idx.union(idx) + assert the_union is idx + + the_union = idx.union(idx[:0]) + assert the_union is idx + + # won't work in python 3 + # tuples = _index.values + # result = _index[:4] | tuples[4:] + # assert result.equals(tuples) + + # not valid for python 3 + # def test_union_with_regular_index(self): + # other = Index(['A', 'B', 'C']) + + # result = other.union(idx) + # assert ('foo', 'one') in result + # assert 'B' in result + + # result2 = _index.union(other) + # assert result.equals(result2) + + +def test_intersection(idx): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_int = piece1 & piece2 + tups = sorted(idx[3:5].values) + expected = MultiIndex.from_tuples(tups) + assert the_int.equals(expected) + + # corner case, pass self + the_int = idx.intersection(idx) + assert the_int is idx + + # empty intersection: disjoint + empty = idx[:2] & idx[2:] + expected = idx[:0] + assert empty.equals(expected) + + # can't do in python 3 + # tuples = _index.values + # result = _index & tuples + # assert result.equals(tuples) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py new file mode 100644 index 0000000000000..d6165c17c6717 --- /dev/null +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex +from pandas.compat import lrange +from pandas.errors import PerformanceWarning, UnsortedIndexError + + +def test_sortlevel(idx): + import random + + tuples = list(idx) + random.shuffle(tuples) + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sortlevel_not_sort_remaining(): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) + assert sorted_idx.equals(mi) + + +def test_sortlevel_deterministic(): + tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), + ('foo', 'one'), ('baz', 'two'), ('qux', 'one')] + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sort(indices): + pytest.raises(TypeError, indices.sort) + + +def test_numpy_argsort(idx): + result = np.argsort(idx) + expected = idx.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(idx), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, + np.argsort, idx, axis=1) + + msg = "the 'kind' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + idx, kind='mergesort') + + msg = "the 'order' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + idx, order=('a', 'b')) + + +def test_unsortedindex(): + # GH 11897 + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, + columns=['one', 'two']) + + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + with pytest.raises(UnsortedIndexError): + df.loc(axis=0)['z', slice('a')] + df.sort_index(inplace=True) + assert len(df.loc(axis=0)['z', :]) == 2 + + with pytest.raises(KeyError): + df.loc(axis=0)['q', :] + + +def test_unsortedindex_doc_examples(): + # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa + dfm = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}) + + dfm = dfm.set_index(['jim', 'joe']) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, 'z')] + + with pytest.raises(UnsortedIndexError): + dfm.loc[(0, 'y'):(1, 'z')] + + assert not dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, 'z')] + dfm.loc[(0, 'y'):(1, 'z')] + + assert dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 2 + + +def test_reconstruct_sort(): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert mi.is_lexsorted() + assert mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert recons.is_lexsorted() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +def test_reconstruct_remove_unused(): + # xref to GH 2770 + df = DataFrame([['deleteMe', 1, 9], + ['keepMe', 2, 9], + ['keepMeToo', 3, 9]], + columns=['first', 'second', 'third']) + df2 = df.set_index(['first', 'second'], drop=False) + df2 = df2[df2['first'] != 'deleteMe'] + + # removed levels are there + expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], + [1, 2, 3]], + labels=[[1, 2], [1, 2]], + names=['first', 'second']) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], + [2, 3]], + labels=[[0, 1], [0, 1]], + names=['first', 'second']) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + +@pytest.mark.parametrize('first_type,second_type', [ + ('int64', 'int64'), + ('datetime64[D]', 'str')]) +def test_remove_unused_levels_large(first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame(dict( + first=rng.randint(0, 1 << 13, size).astype(first_type), + second=rng.randint(0, 1 << 10, size).astype(second_type), + third=rng.rand(size))) + df = df.groupby(['first', 'second']).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(['first', 'second']).index + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize('level0', [['a', 'd', 'b'], + ['a', 'd', 'b', 'unused']]) +@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'], + ['w', 'x', 'y', 'z', 'unused']]) +def test_remove_unused_nan(level0, level1): + # GH 18417 + mi = pd.MultiIndex(levels=[level0, level1], + labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert('unused' not in result.levels[level]) diff --git a/pandas/tests/indexes/multi/test_unique_and_duplicates.py b/pandas/tests/indexes/multi/test_unique_and_duplicates.py new file mode 100644 index 0000000000000..a97d84ace9602 --- /dev/null +++ b/pandas/tests/indexes/multi/test_unique_and_duplicates.py @@ -0,0 +1,259 @@ +# -*- coding: utf-8 -*- + +import warnings +from itertools import product + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import MultiIndex +from pandas.compat import range, u + + +@pytest.mark.parametrize('names', [None, ['first', 'second']]) +def test_unique(names): + mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], + names=names) + + res = mi.unique() + exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], + names=names) + res = mi.unique() + exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], + names=mi.names) + tm.assert_index_equal(res, exp) + + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], + names=names) + res = mi.unique() + exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) + tm.assert_index_equal(res, exp) + + # GH #20568 - empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=names) + res = mi.unique() + tm.assert_index_equal(mi, res) + + +def test_unique_datetimelike(): + idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', + '2015-01-01', 'NaT', 'NaT']) + idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', + '2015-01-02', 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + result = pd.MultiIndex.from_arrays([idx1, idx2]).unique() + + eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) + eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02', + 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + exp = pd.MultiIndex.from_arrays([eidx1, eidx2]) + tm.assert_index_equal(result, exp) + + +@pytest.mark.parametrize('level', [0, 'first', 1, 'second']) +def test_unique_level(idx, level): + # GH #17896 - with level= argument + result = idx.unique(level=level) + expected = idx.get_level_values(level).unique() + tm.assert_index_equal(result, expected) + + # With already unique level + mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], + names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + # With empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + + +def test_duplicate_multiindex_labels(): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + with pytest.raises(ValueError): + ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + + # And that using set_levels with duplicate levels fails + ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], + [1, 2, 1, 2, 3]]) + with pytest.raises(ValueError): + ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], + inplace=True) + + +@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], + [1, 'a', 1]]) +def test_duplicate_level_names(names): + # GH18872, GH19029 + mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names) + assert mi.names == names + + # With .rename() + mi = pd.MultiIndex.from_product([[0, 1]] * 3) + mi = mi.rename(names) + assert mi.names == names + + # With .rename(., level=) + mi.rename(names[1], level=1, inplace=True) + mi = mi.rename([names[0], names[2]], level=[0, 2]) + assert mi.names == names + + +def test_duplicate_meta_data(): + # GH 10115 + index = MultiIndex( + levels=[[0, 1], [0, 1, 2]], + labels=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) + + for idx in [index, + index.set_names([None, None]), + index.set_names([None, 'Num']), + index.set_names(['Upper', 'Num']), ]: + assert idx.has_duplicates + assert idx.drop_duplicates().names == idx.names + + +def test_duplicates(idx): + assert not idx.has_duplicates + assert idx.append(idx).has_duplicates + + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ + [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) + assert index.has_duplicates + + # GH 9075 + t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), + (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), + (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), + (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), + (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), + (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), + (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), + (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), + (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), + (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), + (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), + (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), + (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), + (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), + (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), + (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), + (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), + (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] + + index = pd.MultiIndex.from_tuples(t) + assert not index.has_duplicates + + # handle int64 overflow if possible + def check(nlevels, with_nulls): + labels = np.tile(np.arange(500), 2) + level = np.arange(500) + + if with_nulls: # inject some null values + labels[500] = -1 # common nan value + labels = [labels.copy() for i in range(nlevels)] + for i in range(nlevels): + labels[i][500 + i - nlevels // 2] = -1 + + labels += [np.array([-1, 1]).repeat(500)] + else: + labels = [labels] * nlevels + [np.arange(2).repeat(500)] + + levels = [level] * nlevels + [[0, 1]] + + # no dups + index = MultiIndex(levels=levels, labels=labels) + assert not index.has_duplicates + + # with a dup + if with_nulls: + def f(a): + return np.insert(a, 1000, a[0]) + labels = list(map(f, labels)) + index = MultiIndex(levels=levels, labels=labels) + else: + values = index.values.tolist() + index = MultiIndex.from_tuples(values + [values[0]]) + + assert index.has_duplicates + + # no overflow + check(4, False) + check(4, True) + + # overflow possible + check(8, False) + check(8, True) + + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + labels = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, labels=labels) + + for keep in ['first', 'last', False]: + left = mi.duplicated(keep=keep) + right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) + tm.assert_numpy_array_equal(left, right) + + # GH5873 + for a in [101, 102]: + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + assert not mi.has_duplicates + + with warnings.catch_warnings(record=True): + # Deprecated - see GH20239 + assert mi.get_duplicates().equals(MultiIndex.from_arrays( + [[], []])) + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + 2, dtype='bool')) + + for n in range(1, 6): # 1st level shape + for m in range(1, 5): # 2nd level shape + # all possible unique combinations, including nan + lab = product(range(-1, n), range(-1, m)) + mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], + labels=np.random.permutation(list(lab)).T) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + + with warnings.catch_warnings(record=True): + # Deprecated - see GH20239 + assert mi.get_duplicates().equals(MultiIndex.from_arrays( + [[], []])) + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + len(mi), dtype='bool')) + + +def test_get_unique_index(idx): + idx = idx[[0, 1, 0, 1, 1, 0, 0]] + expected = idx._shallow_copy(idx[[0, 1]]) + + for dropna in [False, True]: + result = idx._get_unique_index(dropna=dropna) + assert result.unique + tm.assert_index_equal(result, expected) + + +def test_unique_na(): + idx = pd.Index([2, np.nan, 2, 1], name='my_index') + expected = pd.Index([2, np.nan, 1], name='my_index') + result = idx.unique() + tm.assert_index_equal(result, expected) + + +def test_duplicate_level_names_access_raises(idx): + idx.names = ['foo', 'foo'] + tm.assert_raises_regex(KeyError, 'Level foo not found', + idx._get_level_number, 'foo') diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py deleted file mode 100644 index b1fb5f01862ae..0000000000000 --- a/pandas/tests/indexes/test_multi.py +++ /dev/null @@ -1,3342 +0,0 @@ -# -*- coding: utf-8 -*- - -import re -import warnings - -from datetime import timedelta -from itertools import product - -import pytest - -import numpy as np - -import pandas as pd - -from pandas import (CategoricalIndex, Categorical, DataFrame, Index, - MultiIndex, compat, date_range, period_range) -from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY -from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.indexes.base import InvalidIndexError -from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike -from pandas._libs.tslib import Timestamp - -import pandas.util.testing as tm - -from pandas.util.testing import assert_almost_equal, assert_copy - -from .common import Base - - -class TestMultiIndex(Base): - _holder = MultiIndex - _compat_props = ['shape', 'ndim', 'size'] - - def setup_method(self, method): - major_axis = Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = Index(['one', 'two']) - - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - self.index_names = ['first', 'second'] - self.indices = dict(index=MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels - ], names=self.index_names, - verify_integrity=False)) - self.setup_indices() - - def create_index(self): - return self.index - - def test_can_hold_identifiers(self): - idx = self.create_index() - key = idx[0] - assert idx._can_hold_identifiers_and_holds_name(key) is True - - def test_boolean_context_compat2(self): - - # boolean context compat - # GH7897 - i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) - common = i1.intersection(i2) - - def f(): - if common: - pass - - tm.assert_raises_regex(ValueError, 'The truth value of a', f) - - def test_labels_dtypes(self): - - # GH 8456 - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - assert i.labels[0].dtype == 'int8' - assert i.labels[1].dtype == 'int8' - - i = MultiIndex.from_product([['a'], range(40)]) - assert i.labels[1].dtype == 'int8' - i = MultiIndex.from_product([['a'], range(400)]) - assert i.labels[1].dtype == 'int16' - i = MultiIndex.from_product([['a'], range(40000)]) - assert i.labels[1].dtype == 'int32' - - i = pd.MultiIndex.from_product([['a'], range(1000)]) - assert (i.labels[0] >= 0).all() - assert (i.labels[1] >= 0).all() - - def test_where(self): - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - - def f(): - i.where(True) - - pytest.raises(NotImplementedError, f) - - def test_where_array_like(self): - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - klasses = [list, tuple, np.array, pd.Series] - cond = [False, True] - - for klass in klasses: - def f(): - return i.where(klass(cond)) - pytest.raises(NotImplementedError, f) - - def test_repeat(self): - reps = 2 - numbers = [1, 2, 3] - names = np.array(['foo', 'bar']) - - m = MultiIndex.from_product([ - numbers, names], names=names) - expected = MultiIndex.from_product([ - numbers, names.repeat(reps)], names=names) - tm.assert_index_equal(m.repeat(reps), expected) - - with tm.assert_produces_warning(FutureWarning): - result = m.repeat(n=reps) - tm.assert_index_equal(result, expected) - - def test_numpy_repeat(self): - reps = 2 - numbers = [1, 2, 3] - names = np.array(['foo', 'bar']) - - m = MultiIndex.from_product([ - numbers, names], names=names) - expected = MultiIndex.from_product([ - numbers, names.repeat(reps)], names=names) - tm.assert_index_equal(np.repeat(m, reps), expected) - - msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex( - ValueError, msg, np.repeat, m, reps, axis=1) - - def test_set_name_methods(self): - # so long as these are synonyms, we don't need to test set_names - assert self.index.rename == self.index.set_names - new_names = [name + "SUFFIX" for name in self.index_names] - ind = self.index.set_names(new_names) - assert self.index.names == self.index_names - assert ind.names == new_names - with tm.assert_raises_regex(ValueError, "^Length"): - ind.set_names(new_names + new_names) - new_names2 = [name + "SUFFIX2" for name in new_names] - res = ind.set_names(new_names2, inplace=True) - assert res is None - assert ind.names == new_names2 - - # set names for specific level (# GH7792) - ind = self.index.set_names(new_names[0], level=0) - assert self.index.names == self.index_names - assert ind.names == [new_names[0], self.index_names[1]] - - res = ind.set_names(new_names2[0], level=0, inplace=True) - assert res is None - assert ind.names == [new_names2[0], self.index_names[1]] - - # set names for multiple levels - ind = self.index.set_names(new_names, level=[0, 1]) - assert self.index.names == self.index_names - assert ind.names == new_names - - res = ind.set_names(new_names2, level=[0, 1], inplace=True) - assert res is None - assert ind.names == new_names2 - - @pytest.mark.parametrize('inplace', [True, False]) - def test_set_names_with_nlevel_1(self, inplace): - # GH 21149 - # Ensure that .set_names for MultiIndex with - # nlevels == 1 does not raise any errors - expected = pd.MultiIndex(levels=[[0, 1]], - labels=[[0, 1]], - names=['first']) - m = pd.MultiIndex.from_product([[0, 1]]) - result = m.set_names('first', level=0, inplace=inplace) - - if inplace: - result = m - - tm.assert_index_equal(result, expected) - - def test_set_levels_labels_directly(self): - # setting levels/labels directly raises AttributeError - - levels = self.index.levels - new_levels = [[lev + 'a' for lev in level] for level in levels] - - labels = self.index.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] - - with pytest.raises(AttributeError): - self.index.levels = new_levels - - with pytest.raises(AttributeError): - self.index.labels = new_labels - - def test_set_levels(self): - # side note - you probably wouldn't want to use levels and labels - # directly like this - but it is possible. - levels = self.index.levels - new_levels = [[lev + 'a' for lev in level] for level in levels] - - def assert_matching(actual, expected, check_dtype=False): - # avoid specifying internal representation - # as much as possible - assert len(actual) == len(expected) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp) - tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) - - # level changing [w/o mutation] - ind2 = self.index.set_levels(new_levels) - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # level changing [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels, inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, new_levels) - - # level changing specific level [w/o mutation] - ind2 = self.index.set_levels(new_levels[0], level=0) - assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(self.index.levels, levels) - - ind2 = self.index.set_levels(new_levels[1], level=1) - assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(self.index.levels, levels) - - # level changing multiple levels [w/o mutation] - ind2 = self.index.set_levels(new_levels, level=[0, 1]) - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # level changing specific level [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(self.index.levels, levels) - - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(self.index.levels, levels) - - # level changing multiple levels [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels, level=[0, 1], - inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # illegal level changing should not change levels - # GH 13754 - original_index = self.index.copy() - for inplace in [True, False]: - with tm.assert_raises_regex(ValueError, "^On"): - self.index.set_levels(['c'], level=0, inplace=inplace) - assert_matching(self.index.levels, original_index.levels, - check_dtype=True) - - with tm.assert_raises_regex(ValueError, "^On"): - self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, - inplace=inplace) - assert_matching(self.index.labels, original_index.labels, - check_dtype=True) - - with tm.assert_raises_regex(TypeError, "^Levels"): - self.index.set_levels('c', level=0, inplace=inplace) - assert_matching(self.index.levels, original_index.levels, - check_dtype=True) - - with tm.assert_raises_regex(TypeError, "^Labels"): - self.index.set_labels(1, level=0, inplace=inplace) - assert_matching(self.index.labels, original_index.labels, - check_dtype=True) - - def test_set_labels(self): - # side note - you probably wouldn't want to use levels and labels - # directly like this - but it is possible. - labels = self.index.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] - - def assert_matching(actual, expected): - # avoid specifying internal representation - # as much as possible - assert len(actual) == len(expected) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp, dtype=np.int8) - tm.assert_numpy_array_equal(act, exp) - - # label changing [w/o mutation] - ind2 = self.index.set_labels(new_labels) - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels, inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, new_labels) - - # label changing specific level [w/o mutation] - ind2 = self.index.set_labels(new_labels[0], level=0) - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(self.index.labels, labels) - - ind2 = self.index.set_labels(new_labels[1], level=1) - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(self.index.labels, labels) - - # label changing multiple levels [w/o mutation] - ind2 = self.index.set_labels(new_labels, level=[0, 1]) - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing specific level [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(self.index.labels, labels) - - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(self.index.labels, labels) - - # label changing multiple levels [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels, level=[0, 1], - inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing for levels of different magnitude of categories - ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) - new_labels = range(129, -1, -1) - expected = pd.MultiIndex.from_tuples( - [(0, i) for i in new_labels]) - - # [w/o mutation] - result = ind.set_labels(labels=new_labels, level=1) - assert result.equals(expected) - - # [w/ mutation] - result = ind.copy() - result.set_labels(labels=new_labels, level=1, inplace=True) - assert result.equals(expected) - - def test_set_levels_labels_names_bad_input(self): - levels, labels = self.index.levels, self.index.labels - names = self.index.names - - with tm.assert_raises_regex(ValueError, 'Length of levels'): - self.index.set_levels([levels[0]]) - - with tm.assert_raises_regex(ValueError, 'Length of labels'): - self.index.set_labels([labels[0]]) - - with tm.assert_raises_regex(ValueError, 'Length of names'): - self.index.set_names([names[0]]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_levels(levels[0]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_labels(labels[0]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list-like'): - self.index.set_names(names[0]) - - # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_levels(levels[0], level=[0, 1]) - - with tm.assert_raises_regex(TypeError, 'list-like'): - self.index.set_levels(levels, level=0) - - # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_labels(labels[0], level=[0, 1]) - - with tm.assert_raises_regex(TypeError, 'list-like'): - self.index.set_labels(labels, level=0) - - # should have equal lengths - with tm.assert_raises_regex(ValueError, 'Length of names'): - self.index.set_names(names[0], level=[0, 1]) - - with tm.assert_raises_regex(TypeError, 'string'): - self.index.set_names(names, level=0) - - def test_set_levels_categorical(self): - # GH13854 - index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) - for ordered in [False, True]: - cidx = CategoricalIndex(list("bac"), ordered=ordered) - result = index.set_levels(cidx, 0) - expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], - labels=index.labels) - tm.assert_index_equal(result, expected) - - result_lvl = result.get_level_values(0) - expected_lvl = CategoricalIndex(list("bacb"), - categories=cidx.categories, - ordered=cidx.ordered) - tm.assert_index_equal(result_lvl, expected_lvl) - - def test_metadata_immutable(self): - levels, labels = self.index.levels, self.index.labels - # shouldn't be able to set at either the top level or base level - mutable_regex = re.compile('does not support mutable operations') - with tm.assert_raises_regex(TypeError, mutable_regex): - levels[0] = levels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): - levels[0][0] = levels[0][0] - # ditto for labels - with tm.assert_raises_regex(TypeError, mutable_regex): - labels[0] = labels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): - labels[0][0] = labels[0][0] - # and for names - names = self.index.names - with tm.assert_raises_regex(TypeError, mutable_regex): - names[0] = names[0] - - def test_inplace_mutation_resets_values(self): - levels = [['a', 'b', 'c'], [4]] - levels2 = [[1, 2, 3], ['a']] - labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] - - mi1 = MultiIndex(levels=levels, labels=labels) - mi2 = MultiIndex(levels=levels2, labels=labels) - vals = mi1.values.copy() - vals2 = mi2.values.copy() - - assert mi1._tuples is not None - - # Make sure level setting works - new_vals = mi1.set_levels(levels2).values - tm.assert_almost_equal(vals2, new_vals) - - # Non-inplace doesn't kill _tuples [implementation detail] - tm.assert_almost_equal(mi1._tuples, vals) - - # ...and values is still same too - tm.assert_almost_equal(mi1.values, vals) - - # Inplace should kill _tuples - mi1.set_levels(levels2, inplace=True) - tm.assert_almost_equal(mi1.values, vals2) - - # Make sure label setting works too - labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] - exp_values = np.empty((6,), dtype=object) - exp_values[:] = [(long(1), 'a')] * 6 - - # Must be 1d array of tuples - assert exp_values.shape == (6,) - new_values = mi2.set_labels(labels2).values - - # Not inplace shouldn't change - tm.assert_almost_equal(mi2._tuples, vals2) - - # Should have correct values - tm.assert_almost_equal(exp_values, new_values) - - # ...and again setting inplace should kill _tuples, etc - mi2.set_labels(labels2, inplace=True) - tm.assert_almost_equal(mi2.values, new_values) - - def test_copy_in_constructor(self): - levels = np.array(["a", "b", "c"]) - labels = np.array([1, 1, 2, 0, 0, 1, 1]) - val = labels[0] - mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], - copy=True) - assert mi.labels[0][0] == val - labels[0] = 15 - assert mi.labels[0][0] == val - val = levels[0] - levels[0] = "PANDA" - assert mi.levels[0][0] == val - - def test_set_value_keeps_names(self): - # motivating example from #3742 - lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] - lev2 = ['1', '2', '3'] * 2 - idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number']) - df = pd.DataFrame( - np.random.randn(6, 4), - columns=['one', 'two', 'three', 'four'], - index=idx) - df = df.sort_index() - assert df._is_copy is None - assert df.index.names == ('Name', 'Number') - df.at[('grethe', '4'), 'one'] = 99.34 - assert df._is_copy is None - assert df.index.names == ('Name', 'Number') - - def test_copy_names(self): - # Check that adding a "names" parameter to the copy is honored - # GH14302 - multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2']) - multi_idx1 = multi_idx.copy() - - assert multi_idx.equals(multi_idx1) - assert multi_idx.names == ['MyName1', 'MyName2'] - assert multi_idx1.names == ['MyName1', 'MyName2'] - - multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2']) - - assert multi_idx.equals(multi_idx2) - assert multi_idx.names == ['MyName1', 'MyName2'] - assert multi_idx2.names == ['NewName1', 'NewName2'] - - multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2']) - - assert multi_idx.equals(multi_idx3) - assert multi_idx.names == ['MyName1', 'MyName2'] - assert multi_idx3.names == ['NewName1', 'NewName2'] - - def test_names(self): - - # names are assigned in setup - names = self.index_names - level_names = [level.name for level in self.index.levels] - assert names == level_names - - # setting bad names on existing - index = self.index - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", - list(index.names) + ["third"]) - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", []) - - # initializing with bad names (should always be equivalent) - major_axis, minor_axis = self.index.levels - major_labels, minor_labels = self.index.labels - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first']) - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first', 'second', 'third']) - - # names are assigned - index.names = ["a", "b"] - ind_names = list(index.names) - level_names = [level.name for level in index.levels] - assert ind_names == level_names - - def test_astype(self): - expected = self.index.copy() - actual = self.index.astype('O') - assert_copy(actual.levels, expected.levels) - assert_copy(actual.labels, expected.labels) - self.check_level_names(actual, expected.names) - - with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): - self.index.astype(np.dtype(int)) - - @pytest.mark.parametrize('ordered', [True, False]) - def test_astype_category(self, ordered): - # GH 18630 - msg = '> 1 ndim Categorical are not supported at this time' - with tm.assert_raises_regex(NotImplementedError, msg): - self.index.astype(CategoricalDtype(ordered=ordered)) - - if ordered is False: - # dtype='category' defaults to ordered=False, so only test once - with tm.assert_raises_regex(NotImplementedError, msg): - self.index.astype('category') - - def test_constructor_single_level(self): - result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) - assert isinstance(result, MultiIndex) - expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') - tm.assert_index_equal(result.levels[0], expected) - assert result.names == ['first'] - - def test_constructor_no_levels(self): - tm.assert_raises_regex(ValueError, "non-zero number " - "of levels/labels", - MultiIndex, levels=[], labels=[]) - both_re = re.compile('Must pass both levels and labels') - with tm.assert_raises_regex(TypeError, both_re): - MultiIndex(levels=[]) - with tm.assert_raises_regex(TypeError, both_re): - MultiIndex(labels=[]) - - def test_constructor_mismatched_label_levels(self): - labels = [np.array([1]), np.array([2]), np.array([3])] - levels = ["a"] - tm.assert_raises_regex(ValueError, "Length of levels and labels " - "must be the same", MultiIndex, - levels=levels, labels=labels) - length_error = re.compile('>= length of level') - label_error = re.compile(r'Unequal label lengths: \[4, 2\]') - - # important to check that it's looking at the right thing. - with tm.assert_raises_regex(ValueError, length_error): - MultiIndex(levels=[['a'], ['b']], - labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) - - with tm.assert_raises_regex(ValueError, label_error): - MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) - - # external API - with tm.assert_raises_regex(ValueError, length_error): - self.index.copy().set_levels([['a'], ['b']]) - - with tm.assert_raises_regex(ValueError, label_error): - self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) - - def test_constructor_nonhashable_names(self): - # GH 20527 - levels = [[1, 2], [u'one', u'two']] - labels = [[0, 0, 1, 1], [0, 1, 0, 1]] - names = ((['foo'], ['bar'])) - message = "MultiIndex.name must be a hashable type" - tm.assert_raises_regex(TypeError, message, - MultiIndex, levels=levels, - labels=labels, names=names) - - # With .rename() - mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=('foo', 'bar')) - renamed = [['foor'], ['barr']] - tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) - # With .set_names() - tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) - - @pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], - [1, 'a', 1]]) - def test_duplicate_level_names(self, names): - # GH18872, GH19029 - mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names) - assert mi.names == names - - # With .rename() - mi = pd.MultiIndex.from_product([[0, 1]] * 3) - mi = mi.rename(names) - assert mi.names == names - - # With .rename(., level=) - mi.rename(names[1], level=1, inplace=True) - mi = mi.rename([names[0], names[2]], level=[0, 2]) - assert mi.names == names - - def test_duplicate_level_names_access_raises(self): - self.index.names = ['foo', 'foo'] - tm.assert_raises_regex(KeyError, 'Level foo not found', - self.index._get_level_number, 'foo') - - def assert_multiindex_copied(self, copy, original): - # Levels should be (at least, shallow copied) - tm.assert_copy(copy.levels, original.levels) - tm.assert_almost_equal(copy.labels, original.labels) - - # Labels doesn't matter which way copied - tm.assert_almost_equal(copy.labels, original.labels) - assert copy.labels is not original.labels - - # Names doesn't matter which way copied - assert copy.names == original.names - assert copy.names is not original.names - - # Sort order should be copied - assert copy.sortorder == original.sortorder - - def test_copy(self): - i_copy = self.index.copy() - - self.assert_multiindex_copied(i_copy, self.index) - - def test_shallow_copy(self): - i_copy = self.index._shallow_copy() - - self.assert_multiindex_copied(i_copy, self.index) - - def test_view(self): - i_view = self.index.view() - - self.assert_multiindex_copied(i_view, self.index) - - def check_level_names(self, index, names): - assert [level.name for level in index.levels] == list(names) - - def test_changing_names(self): - - # names should be applied to levels - level_names = [level.name for level in self.index.levels] - self.check_level_names(self.index, self.index.names) - - view = self.index.view() - copy = self.index.copy() - shallow_copy = self.index._shallow_copy() - - # changing names should change level names on object - new_names = [name + "a" for name in self.index.names] - self.index.names = new_names - self.check_level_names(self.index, new_names) - - # but not on copies - self.check_level_names(view, level_names) - self.check_level_names(copy, level_names) - self.check_level_names(shallow_copy, level_names) - - # and copies shouldn't change original - shallow_copy.names = [name + "c" for name in shallow_copy.names] - self.check_level_names(self.index, new_names) - - def test_get_level_number_integer(self): - self.index.names = [1, 0] - assert self.index._get_level_number(1) == 0 - assert self.index._get_level_number(0) == 1 - pytest.raises(IndexError, self.index._get_level_number, 2) - tm.assert_raises_regex(KeyError, 'Level fourth not found', - self.index._get_level_number, 'fourth') - - def test_from_arrays(self): - arrays = [] - for lev, lab in zip(self.index.levels, self.index.labels): - arrays.append(np.asarray(lev).take(lab)) - - # list of arrays as input - result = MultiIndex.from_arrays(arrays, names=self.index.names) - tm.assert_index_equal(result, self.index) - - # infer correctly - result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], - ['a', 'b']]) - assert result.levels[0].equals(Index([Timestamp('20130101')])) - assert result.levels[1].equals(Index(['a', 'b'])) - - def test_from_arrays_iterator(self): - # GH 18434 - arrays = [] - for lev, lab in zip(self.index.levels, self.index.labels): - arrays.append(np.asarray(lev).take(lab)) - - # iterator as input - result = MultiIndex.from_arrays(iter(arrays), names=self.index.names) - tm.assert_index_equal(result, self.index) - - # invalid iterator input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of array-likes."): - MultiIndex.from_arrays(0) - - def test_from_arrays_index_series_datetimetz(self): - idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern') - idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3, - tz='Asia/Tokyo') - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_series_timedelta(self): - idx1 = pd.timedelta_range('1 days', freq='D', periods=3) - idx2 = pd.timedelta_range('2 hours', freq='H', periods=3) - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_series_period(self): - idx1 = pd.period_range('2011-01-01', freq='D', periods=3) - idx2 = pd.period_range('2015-01-01', freq='H', periods=3) - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_datetimelike_mixed(self): - idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern') - idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3) - idx3 = pd.timedelta_range('1 days', freq='D', periods=3) - idx4 = pd.period_range('2011-01-01', freq='D', periods=3) - - result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - tm.assert_index_equal(result.get_level_values(2), idx3) - tm.assert_index_equal(result.get_level_values(3), idx4) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), - pd.Series(idx2), - pd.Series(idx3), - pd.Series(idx4)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - tm.assert_index_equal(result2.get_level_values(2), idx3) - tm.assert_index_equal(result2.get_level_values(3), idx4) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_series_categorical(self): - # GH13743 - idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), - ordered=False) - idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), - ordered=True) - - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) - tm.assert_index_equal(result3.get_level_values(0), idx1) - tm.assert_index_equal(result3.get_level_values(1), idx2) - - def test_from_arrays_empty(self): - # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): - MultiIndex.from_arrays(arrays=[]) - - # 1 level - result = MultiIndex.from_arrays(arrays=[[]], names=['A']) - assert isinstance(result, MultiIndex) - expected = Index([], name='A') - tm.assert_index_equal(result.levels[0], expected) - - # N levels - for N in [2, 3]: - arrays = [[]] * N - names = list('ABC')[:N] - result = MultiIndex.from_arrays(arrays=arrays, names=names) - expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, - names=names) - tm.assert_index_equal(result, expected) - - def test_from_arrays_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] - for i in invalid_inputs: - pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) - - def test_from_arrays_different_lengths(self): - # see gh-13599 - idx1 = [1, 2, 3] - idx2 = ['a', 'b'] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - idx1 = [] - idx2 = ['a', 'b'] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - idx1 = [1, 2, 3] - idx2 = [] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - def test_from_product(self): - - first = ['foo', 'bar', 'buz'] - second = ['a', 'b', 'c'] - names = ['first', 'second'] - result = MultiIndex.from_product([first, second], names=names) - - tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), - ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), - ('buz', 'c')] - expected = MultiIndex.from_tuples(tuples, names=names) - - tm.assert_index_equal(result, expected) - - def test_from_product_iterator(self): - # GH 18434 - first = ['foo', 'bar', 'buz'] - second = ['a', 'b', 'c'] - names = ['first', 'second'] - tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), - ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), - ('buz', 'c')] - expected = MultiIndex.from_tuples(tuples, names=names) - - # iterator as input - result = MultiIndex.from_product(iter([first, second]), names=names) - tm.assert_index_equal(result, expected) - - # Invalid non-iterable input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of iterables."): - MultiIndex.from_product(0) - - def test_from_product_empty(self): - # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): - MultiIndex.from_product([]) - - # 1 level - result = MultiIndex.from_product([[]], names=['A']) - expected = pd.Index([], name='A') - tm.assert_index_equal(result.levels[0], expected) - - # 2 levels - l1 = [[], ['foo', 'bar', 'baz'], []] - l2 = [[], [], ['a', 'b', 'c']] - names = ['A', 'B'] - for first, second in zip(l1, l2): - result = MultiIndex.from_product([first, second], names=names) - expected = MultiIndex(levels=[first, second], - labels=[[], []], names=names) - tm.assert_index_equal(result, expected) - - # GH12258 - names = ['A', 'B', 'C'] - for N in range(4): - lvl2 = lrange(N) - result = MultiIndex.from_product([[], lvl2, []], names=names) - expected = MultiIndex(levels=[[], lvl2, []], - labels=[[], [], []], names=names) - tm.assert_index_equal(result, expected) - - def test_from_product_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] - for i in invalid_inputs: - pytest.raises(TypeError, MultiIndex.from_product, iterables=i) - - def test_from_product_datetimeindex(self): - dt_index = date_range('2000-01-01', periods=2) - mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( - '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( - '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) - tm.assert_numpy_array_equal(mi.values, etalon) - - def test_from_product_index_series_categorical(self): - # GH13743 - first = ['foo', 'bar'] - for ordered in [False, True]: - idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), - ordered=ordered) - expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"), - categories=list("bac"), - ordered=ordered) - - for arr in [idx, pd.Series(idx), idx.values]: - result = pd.MultiIndex.from_product([first, arr]) - tm.assert_index_equal(result.get_level_values(1), expected) - - def test_values_boxed(self): - tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), - (3, pd.Timestamp('2000-01-03')), - (1, pd.Timestamp('2000-01-04')), - (2, pd.Timestamp('2000-01-02')), - (3, pd.Timestamp('2000-01-03'))] - result = pd.MultiIndex.from_tuples(tuples) - expected = construct_1d_object_array_from_listlike(tuples) - tm.assert_numpy_array_equal(result.values, expected) - # Check that code branches for boxed values produce identical results - tm.assert_numpy_array_equal(result.values[:4], result[:4].values) - - def test_values_multiindex_datetimeindex(self): - # Test to ensure we hit the boxing / nobox part of MI.values - ints = np.arange(10 ** 18, 10 ** 18 + 5) - naive = pd.DatetimeIndex(ints) - aware = pd.DatetimeIndex(ints, tz='US/Central') - - idx = pd.MultiIndex.from_arrays([naive, aware]) - result = idx.values - - outer = pd.DatetimeIndex([x[0] for x in result]) - tm.assert_index_equal(outer, naive) - - inner = pd.DatetimeIndex([x[1] for x in result]) - tm.assert_index_equal(inner, aware) - - # n_lev > n_lab - result = idx[:2].values - - outer = pd.DatetimeIndex([x[0] for x in result]) - tm.assert_index_equal(outer, naive[:2]) - - inner = pd.DatetimeIndex([x[1] for x in result]) - tm.assert_index_equal(inner, aware[:2]) - - def test_values_multiindex_periodindex(self): - # Test to ensure we hit the boxing / nobox part of MI.values - ints = np.arange(2007, 2012) - pidx = pd.PeriodIndex(ints, freq='D') - - idx = pd.MultiIndex.from_arrays([ints, pidx]) - result = idx.values - - outer = pd.Int64Index([x[0] for x in result]) - tm.assert_index_equal(outer, pd.Int64Index(ints)) - - inner = pd.PeriodIndex([x[1] for x in result]) - tm.assert_index_equal(inner, pidx) - - # n_lev > n_lab - result = idx[:2].values - - outer = pd.Int64Index([x[0] for x in result]) - tm.assert_index_equal(outer, pd.Int64Index(ints[:2])) - - inner = pd.PeriodIndex([x[1] for x in result]) - tm.assert_index_equal(inner, pidx[:2]) - - def test_append(self): - result = self.index[:3].append(self.index[3:]) - assert result.equals(self.index) - - foos = [self.index[:1], self.index[1:3], self.index[3:]] - result = foos[0].append(foos[1:]) - assert result.equals(self.index) - - # empty - result = self.index.append([]) - assert result.equals(self.index) - - def test_append_mixed_dtypes(self): - # GH 13660 - dti = date_range('2011-01-01', freq='M', periods=3, ) - dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') - pi = period_range('2011-01', freq='M', periods=3) - - mi = MultiIndex.from_arrays([[1, 2, 3], - [1.1, np.nan, 3.3], - ['a', 'b', 'c'], - dti, dti_tz, pi]) - assert mi.nlevels == 6 - - res = mi.append(mi) - exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], - [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], - ['a', 'b', 'c', 'a', 'b', 'c'], - dti.append(dti), - dti_tz.append(dti_tz), - pi.append(pi)]) - tm.assert_index_equal(res, exp) - - other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], - ['x', 'y', 'z'], ['x', 'y', 'z'], - ['x', 'y', 'z'], ['x', 'y', 'z']]) - - res = mi.append(other) - exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], - [1.1, np.nan, 3.3, 'x', 'y', 'z'], - ['a', 'b', 'c', 'x', 'y', 'z'], - dti.append(pd.Index(['x', 'y', 'z'])), - dti_tz.append(pd.Index(['x', 'y', 'z'])), - pi.append(pd.Index(['x', 'y', 'z']))]) - tm.assert_index_equal(res, exp) - - def test_get_level_values(self): - result = self.index.get_level_values(0) - expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], - name='first') - tm.assert_index_equal(result, expected) - assert result.name == 'first' - - result = self.index.get_level_values('first') - expected = self.index.get_level_values(0) - tm.assert_index_equal(result, expected) - - # GH 10460 - index = MultiIndex( - levels=[CategoricalIndex(['A', 'B']), - CategoricalIndex([1, 2, 3])], - labels=[np.array([0, 0, 0, 1, 1, 1]), - np.array([0, 1, 2, 0, 1, 2])]) - - exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) - tm.assert_index_equal(index.get_level_values(0), exp) - exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) - tm.assert_index_equal(index.get_level_values(1), exp) - - def test_get_level_values_int_with_na(self): - # GH 17924 - arrays = [['a', 'b', 'b'], [1, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = Index([1, np.nan, 2]) - tm.assert_index_equal(result, expected) - - arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = Index([np.nan, np.nan, 2]) - tm.assert_index_equal(result, expected) - - def test_get_level_values_na(self): - arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([np.nan, np.nan, np.nan]) - tm.assert_index_equal(result, expected) - - result = index.get_level_values(1) - expected = pd.Index(['a', np.nan, 1]) - tm.assert_index_equal(result, expected) - - arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = pd.DatetimeIndex([0, 1, pd.NaT]) - tm.assert_index_equal(result, expected) - - arrays = [[], []] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([], dtype=object) - tm.assert_index_equal(result, expected) - - def test_get_level_values_all_na(self): - # GH 17924 when level entirely consists of nan - arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) - tm.assert_index_equal(result, expected) - - result = index.get_level_values(1) - expected = pd.Index(['a', np.nan, 1], dtype=object) - tm.assert_index_equal(result, expected) - - def test_reorder_levels(self): - # this blows up - tm.assert_raises_regex(IndexError, '^Too many levels', - self.index.reorder_levels, [2, 1, 0]) - - def test_nlevels(self): - assert self.index.nlevels == 2 - - def test_iter(self): - result = list(self.index) - expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] - assert result == expected - - def test_legacy_pickle(self, datapath): - if PY3: - pytest.skip("testing for legacy pickles not " - "support on py3") - - path = datapath('indexes', 'data', 'multiindex_v1.pickle') - obj = pd.read_pickle(path) - - obj2 = MultiIndex.from_tuples(obj.values) - assert obj.equals(obj2) - - res = obj.get_indexer(obj) - exp = np.arange(len(obj), dtype=np.intp) - assert_almost_equal(res, exp) - - res = obj.get_indexer(obj2[::-1]) - exp = obj.get_indexer(obj[::-1]) - exp2 = obj2.get_indexer(obj2[::-1]) - assert_almost_equal(res, exp) - assert_almost_equal(exp, exp2) - - def test_legacy_v2_unpickle(self, datapath): - - # 0.7.3 -> 0.8.0 format manage - path = datapath('indexes', 'data', 'mindex_073.pickle') - obj = pd.read_pickle(path) - - obj2 = MultiIndex.from_tuples(obj.values) - assert obj.equals(obj2) - - res = obj.get_indexer(obj) - exp = np.arange(len(obj), dtype=np.intp) - assert_almost_equal(res, exp) - - res = obj.get_indexer(obj2[::-1]) - exp = obj.get_indexer(obj[::-1]) - exp2 = obj2.get_indexer(obj2[::-1]) - assert_almost_equal(res, exp) - assert_almost_equal(exp, exp2) - - def test_roundtrip_pickle_with_tz(self): - - # GH 8367 - # round-trip of timezone - index = MultiIndex.from_product( - [[1, 2], ['a', 'b'], date_range('20130101', periods=3, - tz='US/Eastern') - ], names=['one', 'two', 'three']) - unpickled = tm.round_trip_pickle(index) - assert index.equal_levels(unpickled) - - def test_from_tuples_index_values(self): - result = MultiIndex.from_tuples(self.index) - assert (result.values == self.index.values).all() - - def test_contains(self): - assert ('foo', 'two') in self.index - assert ('bar', 'two') not in self.index - assert None not in self.index - - def test_contains_top_level(self): - midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) - assert 'A' in midx - assert 'A' not in midx._engine - - def test_contains_with_nat(self): - # MI with a NaT - mi = MultiIndex(levels=[['C'], - pd.date_range('2012-01-01', periods=5)], - labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], - names=[None, 'B']) - assert ('C', pd.Timestamp('2012-01-01')) in mi - for val in mi.values: - assert val in mi - - def test_is_all_dates(self): - assert not self.index.is_all_dates - - def test_is_numeric(self): - # MultiIndex is never numeric - assert not self.index.is_numeric() - - def test_getitem(self): - # scalar - assert self.index[2] == ('bar', 'one') - - # slice - result = self.index[2:5] - expected = self.index[[2, 3, 4]] - assert result.equals(expected) - - # boolean - result = self.index[[True, False, True, False, True, True]] - result2 = self.index[np.array([True, False, True, False, True, True])] - expected = self.index[[0, 2, 4, 5]] - assert result.equals(expected) - assert result2.equals(expected) - - def test_getitem_group_select(self): - sorted_idx, _ = self.index.sortlevel(0) - assert sorted_idx.get_loc('baz') == slice(3, 4) - assert sorted_idx.get_loc('foo') == slice(0, 2) - - def test_get_loc(self): - assert self.index.get_loc(('foo', 'two')) == 1 - assert self.index.get_loc(('baz', 'two')) == 3 - pytest.raises(KeyError, self.index.get_loc, ('bar', 'two')) - pytest.raises(KeyError, self.index.get_loc, 'quux') - - pytest.raises(NotImplementedError, self.index.get_loc, 'foo', - method='nearest') - - # 3 levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - pytest.raises(KeyError, index.get_loc, (1, 1)) - assert index.get_loc((2, 0)) == slice(3, 5) - - def test_get_loc_duplicates(self): - index = Index([2, 2, 2, 2]) - result = index.get_loc(2) - expected = slice(0, 4) - assert result == expected - # pytest.raises(Exception, index.get_loc, 2) - - index = Index(['c', 'a', 'a', 'b', 'b']) - rs = index.get_loc('c') - xp = 0 - assert rs == xp - - def test_get_value_duplicates(self): - index = MultiIndex(levels=[['D', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - - assert index.get_loc('D') == slice(0, 3) - with pytest.raises(KeyError): - index._engine.get_value(np.array([]), 'D') - - def test_get_loc_level(self): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - loc, new_index = index.get_loc_level((0, 1)) - expected = slice(1, 2) - exp_index = index[expected].droplevel(0).droplevel(0) - assert loc == expected - assert new_index.equals(exp_index) - - loc, new_index = index.get_loc_level((0, 1, 0)) - expected = 1 - assert loc == expected - assert new_index is None - - pytest.raises(KeyError, index.get_loc_level, (2, 2)) - - index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( - [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) - result, new_index = index.get_loc_level((2000, slice(None, None))) - expected = slice(None, None) - assert result == expected - assert new_index.equals(index.droplevel(0)) - - @pytest.mark.parametrize('level', [0, 1]) - @pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) - def test_get_loc_nan(self, level, null_val): - # GH 18485 : NaN in MultiIndex - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - levels[level] = np.array([0, null_val], dtype=type(null_val)) - key[level] = null_val - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - def test_get_loc_missing_nan(self): - # GH 8569 - idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) - assert isinstance(idx.get_loc(1), slice) - pytest.raises(KeyError, idx.get_loc, 3) - pytest.raises(KeyError, idx.get_loc, np.nan) - pytest.raises(KeyError, idx.get_loc, [np.nan]) - - @pytest.mark.parametrize('dtype1', [int, float, bool, str]) - @pytest.mark.parametrize('dtype2', [int, float, bool, str]) - def test_get_loc_multiple_dtypes(self, dtype1, dtype2): - # GH 18520 - levels = [np.array([0, 1]).astype(dtype1), - np.array([0, 1]).astype(dtype2)] - idx = pd.MultiIndex.from_product(levels) - assert idx.get_loc(idx[2]) == 2 - - @pytest.mark.parametrize('level', [0, 1]) - @pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) - def test_get_loc_implicit_cast(self, level, dtypes): - # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - lev_dtype, key_dtype = dtypes - levels[level] = np.array([0, 1], dtype=lev_dtype) - key[level] = key_dtype(1) - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - def test_get_loc_cast_bool(self): - # GH 19086 : int is casted to bool, but not vice-versa - levels = [[False, True], np.arange(2, dtype='int64')] - idx = MultiIndex.from_product(levels) - - assert idx.get_loc((0, 1)) == 1 - assert idx.get_loc((1, 0)) == 2 - - pytest.raises(KeyError, idx.get_loc, (False, True)) - pytest.raises(KeyError, idx.get_loc, (True, False)) - - def test_slice_locs(self): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index - - slob = slice(*idx.slice_locs(df.index[5], df.index[15])) - sliced = stacked[slob] - expected = df[5:16].stack() - tm.assert_almost_equal(sliced.values, expected.values) - - slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30), - df.index[15] - timedelta(seconds=30))) - sliced = stacked[slob] - expected = df[6:15].stack() - tm.assert_almost_equal(sliced.values, expected.values) - - def test_slice_locs_with_type_mismatch(self): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, (1, 3)) - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, - df.index[5] + timedelta( - seconds=30), (5, 2)) - df = tm.makeCustomDataframe(5, 5) - stacked = df.stack() - idx = stacked.index - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): - idx.slice_locs(timedelta(seconds=30)) - # TODO: Try creating a UnicodeDecodeError in exception message - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): - idx.slice_locs(df.index[1], (16, "a")) - - def test_slice_locs_not_sorted(self): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " - "MultiIndex lexsort depth", - index.slice_locs, (1, 0, 1), (2, 1, 0)) - - # works - sorted_index, _ = index.sortlevel(0) - # should there be a test case here??? - sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) - - def test_slice_locs_partial(self): - sorted_idx, _ = self.index.sortlevel(0) - - result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) - assert result == (1, 5) - - result = sorted_idx.slice_locs(None, ('qux', 'one')) - assert result == (0, 5) - - result = sorted_idx.slice_locs(('foo', 'two'), None) - assert result == (1, len(sorted_idx)) - - result = sorted_idx.slice_locs('bar', 'baz') - assert result == (2, 4) - - def test_slice_locs_not_contained(self): - # some searchsorted action - - index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], - labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], - [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) - - result = index.slice_locs((1, 0), (5, 2)) - assert result == (3, 6) - - result = index.slice_locs(1, 5) - assert result == (3, 6) - - result = index.slice_locs((2, 2), (5, 2)) - assert result == (3, 6) - - result = index.slice_locs(2, 5) - assert result == (3, 6) - - result = index.slice_locs((1, 0), (6, 3)) - assert result == (3, 8) - - result = index.slice_locs(-1, 10) - assert result == (0, len(index)) - - def test_consistency(self): - # need to construct an overflow - major_axis = lrange(70000) - minor_axis = lrange(10) - - major_labels = np.arange(70000) - minor_labels = np.repeat(lrange(10), 7000) - - # the fact that is works means it's consistent - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - # inconsistent - major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - assert not index.is_unique - - def test_truncate(self): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - result = index.truncate(before=1) - assert 'foo' not in result.levels[0] - assert 1 in result.levels[0] - - result = index.truncate(after=1) - assert 2 not in result.levels[0] - assert 1 in result.levels[0] - - result = index.truncate(before=1, after=2) - assert len(result.levels[0]) == 2 - - # after < before - pytest.raises(ValueError, index.truncate, 3, 1) - - def test_get_indexer(self): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) - minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - idx1 = index[:5] - idx2 = index[[1, 3, 5]] - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) - - r1 = idx2.get_indexer(idx1, method='pad') - e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='pad') - assert_almost_equal(r2, e1[::-1]) - - rffill1 = idx2.get_indexer(idx1, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx2.get_indexer(idx1, method='backfill') - e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='backfill') - assert_almost_equal(r2, e1[::-1]) - - rbfill1 = idx2.get_indexer(idx1, method='bfill') - assert_almost_equal(r1, rbfill1) - - # pass non-MultiIndex - r1 = idx1.get_indexer(idx2.values) - rexp1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, rexp1) - - r1 = idx1.get_indexer([1, 2, 3]) - assert (r1 == [-1, -1, -1]).all() - - # create index with duplicates - idx1 = Index(lrange(10) + lrange(10)) - idx2 = Index(lrange(20)) - - msg = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, msg): - idx1.get_indexer(idx2) - - def test_get_indexer_nearest(self): - midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='nearest') - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='pad', tolerance=2) - - def test_get_indexer_categorical_time(self): - # https://github.com/pandas-dev/pandas/issues/21390 - midx = MultiIndex.from_product( - [Categorical(['a', 'b', 'c']), - Categorical(date_range("2012-01-01", periods=3, freq='H'))]) - result = midx.get_indexer(midx) - tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) - - def test_hash_collisions(self): - # non-smoke test that we don't get hash collisions - - index = MultiIndex.from_product([np.arange(1000), np.arange(1000)], - names=['one', 'two']) - result = index.get_indexer(index.values) - tm.assert_numpy_array_equal(result, np.arange( - len(index), dtype='intp')) - - for i in [0, 1, len(index) - 2, len(index) - 1]: - result = index.get_loc(index[i]) - assert result == i - - def test_format(self): - self.index.format() - self.index[:0].format() - - def test_format_integer_names(self): - index = MultiIndex(levels=[[0, 1], [0, 1]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) - index.format(names=True) - - def test_format_sparse_display(self): - index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], - labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], - [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) - - result = index.format() - assert result[3] == '1 0 0 0' - - def test_format_sparse_config(self): - warn_filters = warnings.filters - warnings.filterwarnings('ignore', category=FutureWarning, - module=".*format") - # GH1538 - pd.set_option('display.multi_sparse', False) - - result = self.index.format() - assert result[1] == 'foo two' - - tm.reset_display_options() - - warnings.filters = warn_filters - - def test_to_frame(self): - tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] - - index = MultiIndex.from_tuples(tuples) - result = index.to_frame(index=False) - expected = DataFrame(tuples) - tm.assert_frame_equal(result, expected) - - result = index.to_frame() - expected.index = index - tm.assert_frame_equal(result, expected) - - tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] - index = MultiIndex.from_tuples(tuples, names=['first', 'second']) - result = index.to_frame(index=False) - expected = DataFrame(tuples) - expected.columns = ['first', 'second'] - tm.assert_frame_equal(result, expected) - - result = index.to_frame() - expected.index = index - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_product([range(5), - pd.date_range('20130101', periods=3)]) - result = index.to_frame(index=False) - expected = DataFrame( - {0: np.repeat(np.arange(5, dtype='int64'), 3), - 1: np.tile(pd.date_range('20130101', periods=3), 5)}) - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_product([range(5), - pd.date_range('20130101', periods=3)]) - result = index.to_frame() - expected.index = index - tm.assert_frame_equal(result, expected) - - def test_to_hierarchical(self): - # GH21613 - index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two')]) - with tm.assert_produces_warning(FutureWarning): - result = index.to_hierarchical(3) - expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) - tm.assert_index_equal(result, expected) - assert result.names == index.names - - # K > 1 - with tm.assert_produces_warning(FutureWarning): - result = index.to_hierarchical(3, 2) - expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) - tm.assert_index_equal(result, expected) - assert result.names == index.names - - # non-sorted - index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), - (2, 'a'), (2, 'b')], - names=['N1', 'N2']) - with tm.assert_produces_warning(FutureWarning): - result = index.to_hierarchical(2) - expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), - (1, 'b'), - (2, 'a'), (2, 'a'), - (2, 'b'), (2, 'b')], - names=['N1', 'N2']) - tm.assert_index_equal(result, expected) - assert result.names == index.names - - def test_bounds(self): - self.index._bounds - - def test_equals_multi(self): - assert self.index.equals(self.index) - assert not self.index.equals(self.index.values) - assert self.index.equals(Index(self.index.values)) - - assert self.index.equal_levels(self.index) - assert not self.index.equals(self.index[:-1]) - assert not self.index.equals(self.index[-1]) - - # different number of levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) - assert not index.equals(index2) - assert not index.equal_levels(index2) - - # levels are different - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - assert not self.index.equals(index) - assert not self.index.equal_levels(index) - - # some of the labels are different - major_axis = Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = Index(['one', 'two']) - - major_labels = np.array([0, 0, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - assert not self.index.equals(index) - - def test_equals_missing_values(self): - # make sure take is not using -1 - i = pd.MultiIndex.from_tuples([(0, pd.NaT), - (0, pd.Timestamp('20130101'))]) - result = i[0:1].equals(i[0]) - assert not result - result = i[1:2].equals(i[1]) - assert not result - - def test_identical(self): - mi = self.index.copy() - mi2 = self.index.copy() - assert mi.identical(mi2) - - mi = mi.set_names(['new1', 'new2']) - assert mi.equals(mi2) - assert not mi.identical(mi2) - - mi2 = mi2.set_names(['new1', 'new2']) - assert mi.identical(mi2) - - mi3 = Index(mi.tolist(), names=mi.names) - mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) - assert mi.identical(mi3) - assert not mi.identical(mi4) - assert mi.equals(mi4) - - def test_is_(self): - mi = MultiIndex.from_tuples(lzip(range(10), range(10))) - assert mi.is_(mi) - assert mi.is_(mi.view()) - assert mi.is_(mi.view().view().view().view()) - mi2 = mi.view() - # names are metadata, they don't change id - mi2.names = ["A", "B"] - assert mi2.is_(mi) - assert mi.is_(mi2) - - assert mi.is_(mi.set_names(["C", "D"])) - mi2 = mi.view() - mi2.set_names(["E", "F"], inplace=True) - assert mi.is_(mi2) - # levels are inherent properties, they change identity - mi3 = mi2.set_levels([lrange(10), lrange(10)]) - assert not mi3.is_(mi2) - # shouldn't change - assert mi2.is_(mi) - mi4 = mi3.view() - - # GH 17464 - Remove duplicate MultiIndex levels - mi4.set_levels([lrange(10), lrange(10)], inplace=True) - assert not mi4.is_(mi3) - mi5 = mi.view() - mi5.set_levels(mi5.levels, inplace=True) - assert not mi5.is_(mi) - - def test_union(self): - piece1 = self.index[:5][::-1] - piece2 = self.index[3:] - - the_union = piece1 | piece2 - - tups = sorted(self.index.values) - expected = MultiIndex.from_tuples(tups) - - assert the_union.equals(expected) - - # corner case, pass self or empty thing: - the_union = self.index.union(self.index) - assert the_union is self.index - - the_union = self.index.union(self.index[:0]) - assert the_union is self.index - - # won't work in python 3 - # tuples = self.index.values - # result = self.index[:4] | tuples[4:] - # assert result.equals(tuples) - - # not valid for python 3 - # def test_union_with_regular_index(self): - # other = Index(['A', 'B', 'C']) - - # result = other.union(self.index) - # assert ('foo', 'one') in result - # assert 'B' in result - - # result2 = self.index.union(other) - # assert result.equals(result2) - - def test_intersection(self): - piece1 = self.index[:5][::-1] - piece2 = self.index[3:] - - the_int = piece1 & piece2 - tups = sorted(self.index[3:5].values) - expected = MultiIndex.from_tuples(tups) - assert the_int.equals(expected) - - # corner case, pass self - the_int = self.index.intersection(self.index) - assert the_int is self.index - - # empty intersection: disjoint - empty = self.index[:2] & self.index[2:] - expected = self.index[:0] - assert empty.equals(expected) - - # can't do in python 3 - # tuples = self.index.values - # result = self.index & tuples - # assert result.equals(tuples) - - def test_sub(self): - - first = self.index - - # - now raises (previously was set op difference) - with pytest.raises(TypeError): - first - self.index[-3:] - with pytest.raises(TypeError): - self.index[-3:] - first - with pytest.raises(TypeError): - self.index[-3:] - first.tolist() - with pytest.raises(TypeError): - first.tolist() - self.index[-3:] - - def test_difference(self): - - first = self.index - result = first.difference(self.index[-3:]) - expected = MultiIndex.from_tuples(sorted(self.index[:-3].values), - sortorder=0, - names=self.index.names) - - assert isinstance(result, MultiIndex) - assert result.equals(expected) - assert result.names == self.index.names - - # empty difference: reflexive - result = self.index.difference(self.index) - expected = self.index[:0] - assert result.equals(expected) - assert result.names == self.index.names - - # empty difference: superset - result = self.index[-3:].difference(self.index) - expected = self.index[:0] - assert result.equals(expected) - assert result.names == self.index.names - - # empty difference: degenerate - result = self.index[:0].difference(self.index) - expected = self.index[:0] - assert result.equals(expected) - assert result.names == self.index.names - - # names not the same - chunklet = self.index[-3:] - chunklet.names = ['foo', 'baz'] - result = first.difference(chunklet) - assert result.names == (None, None) - - # empty, but non-equal - result = self.index.difference(self.index.sortlevel(1)[0]) - assert len(result) == 0 - - # raise Exception called with non-MultiIndex - result = first.difference(first.values) - assert result.equals(first[:0]) - - # name from empty array - result = first.difference([]) - assert first.equals(result) - assert first.names == result.names - - # name from non-empty array - result = first.difference([('foo', 'one')]) - expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( - 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) - expected.names = first.names - assert first.names == result.names - tm.assert_raises_regex(TypeError, "other must be a MultiIndex " - "or a list of tuples", - first.difference, [1, 2, 3, 4, 5]) - - def test_from_tuples(self): - tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' - 'from empty list', - MultiIndex.from_tuples, []) - - expected = MultiIndex(levels=[[1, 3], [2, 4]], - labels=[[0, 1], [0, 1]], - names=['a', 'b']) - - # input tuples - result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) - tm.assert_index_equal(result, expected) - - def test_from_tuples_iterator(self): - # GH 18434 - # input iterator for tuples - expected = MultiIndex(levels=[[1, 3], [2, 4]], - labels=[[0, 1], [0, 1]], - names=['a', 'b']) - - result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b']) - tm.assert_index_equal(result, expected) - - # input non-iterables - with tm.assert_raises_regex( - TypeError, 'Input must be a list / sequence of tuple-likes.'): - MultiIndex.from_tuples(0) - - def test_from_tuples_empty(self): - # GH 16777 - result = MultiIndex.from_tuples([], names=['a', 'b']) - expected = MultiIndex.from_arrays(arrays=[[], []], - names=['a', 'b']) - tm.assert_index_equal(result, expected) - - def test_argsort(self): - result = self.index.argsort() - expected = self.index.values.argsort() - tm.assert_numpy_array_equal(result, expected) - - def test_sortlevel(self): - import random - - tuples = list(self.index) - random.shuffle(tuples) - - index = MultiIndex.from_tuples(tuples) - - sorted_idx, _ = index.sortlevel(0) - expected = MultiIndex.from_tuples(sorted(tuples)) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(0, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - sorted_idx, _ = index.sortlevel(1) - by1 = sorted(tuples, key=lambda x: (x[1], x[0])) - expected = MultiIndex.from_tuples(by1) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(1, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - def test_sortlevel_not_sort_remaining(self): - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) - assert sorted_idx.equals(mi) - - def test_sortlevel_deterministic(self): - tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), - ('foo', 'one'), ('baz', 'two'), ('qux', 'one')] - - index = MultiIndex.from_tuples(tuples) - - sorted_idx, _ = index.sortlevel(0) - expected = MultiIndex.from_tuples(sorted(tuples)) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(0, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - sorted_idx, _ = index.sortlevel(1) - by1 = sorted(tuples, key=lambda x: (x[1], x[0])) - expected = MultiIndex.from_tuples(by1) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(1, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - def test_dims(self): - pass - - def test_drop(self): - dropped = self.index.drop([('foo', 'two'), ('qux', 'one')]) - - index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')]) - dropped2 = self.index.drop(index) - - expected = self.index[[0, 2, 3, 5]] - tm.assert_index_equal(dropped, expected) - tm.assert_index_equal(dropped2, expected) - - dropped = self.index.drop(['bar']) - expected = self.index[[0, 1, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - dropped = self.index.drop('foo') - expected = self.index[[2, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - index = MultiIndex.from_tuples([('bar', 'two')]) - pytest.raises(KeyError, self.index.drop, [('bar', 'two')]) - pytest.raises(KeyError, self.index.drop, index) - pytest.raises(KeyError, self.index.drop, ['foo', 'two']) - - # partially correct argument - mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) - pytest.raises(KeyError, self.index.drop, mixed_index) - - # error='ignore' - dropped = self.index.drop(index, errors='ignore') - expected = self.index[[0, 1, 2, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - dropped = self.index.drop(mixed_index, errors='ignore') - expected = self.index[[0, 1, 2, 3, 5]] - tm.assert_index_equal(dropped, expected) - - dropped = self.index.drop(['foo', 'two'], errors='ignore') - expected = self.index[[2, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - # mixed partial / full drop - dropped = self.index.drop(['foo', ('qux', 'one')]) - expected = self.index[[2, 3, 5]] - tm.assert_index_equal(dropped, expected) - - # mixed partial / full drop / error='ignore' - mixed_index = ['foo', ('qux', 'one'), 'two'] - pytest.raises(KeyError, self.index.drop, mixed_index) - dropped = self.index.drop(mixed_index, errors='ignore') - expected = self.index[[2, 3, 5]] - tm.assert_index_equal(dropped, expected) - - def test_droplevel_with_names(self): - index = self.index[self.index.get_loc('foo')] - dropped = index.droplevel(0) - assert dropped.name == 'second' - - index = MultiIndex( - levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) - dropped = index.droplevel(0) - assert dropped.names == ('two', 'three') - - dropped = index.droplevel('two') - expected = index.droplevel(1) - assert dropped.equals(expected) - - def test_droplevel_list(self): - index = MultiIndex( - levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) - - dropped = index[:2].droplevel(['three', 'one']) - expected = index[:2].droplevel(2).droplevel(0) - assert dropped.equals(expected) - - dropped = index[:2].droplevel([]) - expected = index[:2] - assert dropped.equals(expected) - - with pytest.raises(ValueError): - index[:2].droplevel(['one', 'two', 'three']) - - with pytest.raises(KeyError): - index[:2].droplevel(['one', 'four']) - - def test_drop_not_lexsorted(self): - # GH 12078 - - # define the lexsorted version of the multi-index - tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')] - lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c']) - assert lexsorted_mi.is_lexsorted() - - # and the not-lexsorted version - df = pd.DataFrame(columns=['a', 'b', 'c', 'd'], - data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]]) - df = df.pivot_table(index='a', columns=['b', 'c'], values='d') - df = df.reset_index() - not_lexsorted_mi = df.columns - assert not not_lexsorted_mi.is_lexsorted() - - # compare the results - tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) - with tm.assert_produces_warning(PerformanceWarning): - tm.assert_index_equal(lexsorted_mi.drop('a'), - not_lexsorted_mi.drop('a')) - - def test_insert(self): - # key contained in all levels - new_index = self.index.insert(0, ('bar', 'two')) - assert new_index.equal_levels(self.index) - assert new_index[0] == ('bar', 'two') - - # key not contained in all levels - new_index = self.index.insert(0, ('abc', 'three')) - - exp0 = Index(list(self.index.levels[0]) + ['abc'], name='first') - tm.assert_index_equal(new_index.levels[0], exp0) - - exp1 = Index(list(self.index.levels[1]) + ['three'], name='second') - tm.assert_index_equal(new_index.levels[1], exp1) - assert new_index[0] == ('abc', 'three') - - # key wrong length - msg = "Item must have length equal to number of levels" - with tm.assert_raises_regex(ValueError, msg): - self.index.insert(0, ('foo2',)) - - left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], - columns=['1st', '2nd', '3rd']) - left.set_index(['1st', '2nd'], inplace=True) - ts = left['3rd'].copy(deep=True) - - left.loc[('b', 'x'), '3rd'] = 2 - left.loc[('b', 'a'), '3rd'] = -1 - left.loc[('b', 'b'), '3rd'] = 3 - left.loc[('a', 'x'), '3rd'] = 4 - left.loc[('a', 'w'), '3rd'] = 5 - left.loc[('a', 'a'), '3rd'] = 6 - - ts.loc[('b', 'x')] = 2 - ts.loc['b', 'a'] = -1 - ts.loc[('b', 'b')] = 3 - ts.loc['a', 'x'] = 4 - ts.loc[('a', 'w')] = 5 - ts.loc['a', 'a'] = 6 - - right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2], - ['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4], - ['a', 'w', 5], ['a', 'a', 6]], - columns=['1st', '2nd', '3rd']) - right.set_index(['1st', '2nd'], inplace=True) - # FIXME data types changes to float because - # of intermediate nan insertion; - tm.assert_frame_equal(left, right, check_dtype=False) - tm.assert_series_equal(ts, right['3rd']) - - # GH9250 - idx = [('test1', i) for i in range(5)] + \ - [('test2', i) for i in range(6)] + \ - [('test', 17), ('test', 18)] - - left = pd.Series(np.linspace(0, 10, 11), - pd.MultiIndex.from_tuples(idx[:-2])) - - left.loc[('test', 17)] = 11 - left.loc[('test', 18)] = 12 - - right = pd.Series(np.linspace(0, 12, 13), - pd.MultiIndex.from_tuples(idx)) - - tm.assert_series_equal(left, right) - - def test_take_preserve_name(self): - taken = self.index.take([3, 0, 1]) - assert taken.names == self.index.names - - def test_take_fill_value(self): - # GH 12631 - vals = [['A', 'B'], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] - idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) - - result = idx.take(np.array([1, 0, -1])) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - ('B', pd.Timestamp('2011-01-02'))] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - (np.nan, pd.NaT)] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - ('B', pd.Timestamp('2011-01-02'))] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - def take_invalid_kwargs(self): - vals = [['A', 'B'], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] - idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) - indices = [1, 2] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') - - @pytest.mark.parametrize('other', - [Index(['three', 'one', 'two']), - Index(['one']), - Index(['one', 'three'])]) - def test_join_level(self, other, join_type): - join_index, lidx, ridx = other.join(self.index, how=join_type, - level='second', - return_indexers=True) - - exp_level = other.join(self.index.levels[1], how=join_type) - assert join_index.levels[0].equals(self.index.levels[0]) - assert join_index.levels[1].equals(exp_level) - - # pare down levels - mask = np.array( - [x[1] in exp_level for x in self.index], dtype=bool) - exp_values = self.index.values[mask] - tm.assert_numpy_array_equal(join_index.values, exp_values) - - if join_type in ('outer', 'inner'): - join_index2, ridx2, lidx2 = \ - self.index.join(other, how=join_type, level='second', - return_indexers=True) - - assert join_index.equals(join_index2) - tm.assert_numpy_array_equal(lidx, lidx2) - tm.assert_numpy_array_equal(ridx, ridx2) - tm.assert_numpy_array_equal(join_index2.values, exp_values) - - def test_join_level_corner_case(self): - # some corner cases - idx = Index(['three', 'one', 'two']) - result = idx.join(self.index, level='second') - assert isinstance(result, MultiIndex) - - tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", - self.index.join, self.index, level=1) - - def test_join_self(self, join_type): - res = self.index - joined = res.join(res, how=join_type) - assert res is joined - - def test_join_multi(self): - # GH 10665 - midx = pd.MultiIndex.from_product( - [np.arange(4), np.arange(4)], names=['a', 'b']) - idx = pd.Index([1, 2, 5], name='b') - - # inner - jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) - exp_idx = pd.MultiIndex.from_product( - [np.arange(4), [1, 2]], names=['a', 'b']) - exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) - exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) - tm.assert_index_equal(jidx, exp_idx) - tm.assert_numpy_array_equal(lidx, exp_lidx) - tm.assert_numpy_array_equal(ridx, exp_ridx) - # flip - jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True) - tm.assert_index_equal(jidx, exp_idx) - tm.assert_numpy_array_equal(lidx, exp_lidx) - tm.assert_numpy_array_equal(ridx, exp_ridx) - - # keep MultiIndex - jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) - exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, - 1, -1], dtype=np.intp) - tm.assert_index_equal(jidx, midx) - assert lidx is None - tm.assert_numpy_array_equal(ridx, exp_ridx) - # flip - jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) - tm.assert_index_equal(jidx, midx) - assert lidx is None - tm.assert_numpy_array_equal(ridx, exp_ridx) - - def test_reindex(self): - result, indexer = self.index.reindex(list(self.index[:4])) - assert isinstance(result, MultiIndex) - self.check_level_names(result, self.index[:4].names) - - result, indexer = self.index.reindex(list(self.index)) - assert isinstance(result, MultiIndex) - assert indexer is None - self.check_level_names(result, self.index.names) - - def test_reindex_level(self): - idx = Index(['one']) - - target, indexer = self.index.reindex(idx, level='second') - target2, indexer2 = idx.reindex(self.index, level='second') - - exp_index = self.index.join(idx, level='second', how='right') - exp_index2 = self.index.join(idx, level='second', how='left') - - assert target.equals(exp_index) - exp_indexer = np.array([0, 2, 4]) - tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) - - assert target2.equals(exp_index2) - exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) - tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) - - tm.assert_raises_regex(TypeError, "Fill method not supported", - self.index.reindex, self.index, - method='pad', level='second') - - tm.assert_raises_regex(TypeError, "Fill method not supported", - idx.reindex, idx, method='bfill', - level='first') - - def test_duplicates(self): - assert not self.index.has_duplicates - assert self.index.append(self.index).has_duplicates - - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ - [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - assert index.has_duplicates - - # GH 9075 - t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), - (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), - (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), - (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), - (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), - (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), - (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), - (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), - (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), - (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), - (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), - (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), - (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), - (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), - (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), - (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), - (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), - (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] - - index = pd.MultiIndex.from_tuples(t) - assert not index.has_duplicates - - # handle int64 overflow if possible - def check(nlevels, with_nulls): - labels = np.tile(np.arange(500), 2) - level = np.arange(500) - - if with_nulls: # inject some null values - labels[500] = -1 # common nan value - labels = [labels.copy() for i in range(nlevels)] - for i in range(nlevels): - labels[i][500 + i - nlevels // 2] = -1 - - labels += [np.array([-1, 1]).repeat(500)] - else: - labels = [labels] * nlevels + [np.arange(2).repeat(500)] - - levels = [level] * nlevels + [[0, 1]] - - # no dups - index = MultiIndex(levels=levels, labels=labels) - assert not index.has_duplicates - - # with a dup - if with_nulls: - def f(a): - return np.insert(a, 1000, a[0]) - labels = list(map(f, labels)) - index = MultiIndex(levels=levels, labels=labels) - else: - values = index.values.tolist() - index = MultiIndex.from_tuples(values + [values[0]]) - - assert index.has_duplicates - - # no overflow - check(4, False) - check(4, True) - - # overflow possible - check(8, False) - check(8, True) - - # GH 9125 - n, k = 200, 5000 - levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - labels = [np.random.choice(n, k * n) for lev in levels] - mi = MultiIndex(levels=levels, labels=labels) - - for keep in ['first', 'last', False]: - left = mi.duplicated(keep=keep) - right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) - tm.assert_numpy_array_equal(left, right) - - # GH5873 - for a in [101, 102]: - mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) - assert not mi.has_duplicates - - with warnings.catch_warnings(record=True): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( - [[], []])) - - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - 2, dtype='bool')) - - for n in range(1, 6): # 1st level shape - for m in range(1, 5): # 2nd level shape - # all possible unique combinations, including nan - lab = product(range(-1, n), range(-1, m)) - mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], - labels=np.random.permutation(list(lab)).T) - assert len(mi) == (n + 1) * (m + 1) - assert not mi.has_duplicates - - with warnings.catch_warnings(record=True): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( - [[], []])) - - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - len(mi), dtype='bool')) - - def test_duplicate_meta_data(self): - # GH 10115 - index = MultiIndex( - levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) - - for idx in [index, - index.set_names([None, None]), - index.set_names([None, 'Num']), - index.set_names(['Upper', 'Num']), ]: - assert idx.has_duplicates - assert idx.drop_duplicates().names == idx.names - - def test_get_unique_index(self): - idx = self.index[[0, 1, 0, 1, 1, 0, 0]] - expected = self.index._shallow_copy(idx[[0, 1]]) - - for dropna in [False, True]: - result = idx._get_unique_index(dropna=dropna) - assert result.unique - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize('names', [None, ['first', 'second']]) - def test_unique(self, names): - mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], - names=names) - - res = mi.unique() - exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) - tm.assert_index_equal(res, exp) - - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], - names=names) - res = mi.unique() - exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], - names=mi.names) - tm.assert_index_equal(res, exp) - - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], - names=names) - res = mi.unique() - exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) - tm.assert_index_equal(res, exp) - - # GH #20568 - empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=names) - res = mi.unique() - tm.assert_index_equal(mi, res) - - @pytest.mark.parametrize('level', [0, 'first', 1, 'second']) - def test_unique_level(self, level): - # GH #17896 - with level= argument - result = self.index.unique(level=level) - expected = self.index.get_level_values(level).unique() - tm.assert_index_equal(result, expected) - - # With already unique level - mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], - names=['first', 'second']) - result = mi.unique(level=level) - expected = mi.get_level_values(level) - tm.assert_index_equal(result, expected) - - # With empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) - result = mi.unique(level=level) - expected = mi.get_level_values(level) - - def test_unique_datetimelike(self): - idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', - '2015-01-01', 'NaT', 'NaT']) - idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', - '2015-01-02', 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - result = pd.MultiIndex.from_arrays([idx1, idx2]).unique() - - eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) - eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02', - 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - exp = pd.MultiIndex.from_arrays([eidx1, eidx2]) - tm.assert_index_equal(result, exp) - - def test_tolist(self): - result = self.index.tolist() - exp = list(self.index.values) - assert result == exp - - def test_repr_with_unicode_data(self): - with pd.core.config.option_context("display.encoding", 'UTF-8'): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - index = pd.DataFrame(d).set_index(["a", "b"]).index - assert "\\u" not in repr(index) # we don't want unicode-escaped - - def test_repr_roundtrip(self): - - mi = MultiIndex.from_product([list('ab'), range(3)], - names=['first', 'second']) - str(mi) - - if PY3: - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - else: - result = eval(repr(mi)) - # string coerces to unicode - tm.assert_index_equal(result, mi, exact=False) - assert mi.get_level_values('first').inferred_type == 'string' - assert result.get_level_values('first').inferred_type == 'unicode' - - mi_u = MultiIndex.from_product( - [list(u'ab'), range(3)], names=['first', 'second']) - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - # formatting - if PY3: - str(mi) - else: - compat.text_type(mi) - - # long format - mi = MultiIndex.from_product([list('abcdefg'), range(10)], - names=['first', 'second']) - - if PY3: - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - else: - result = eval(repr(mi)) - # string coerces to unicode - tm.assert_index_equal(result, mi, exact=False) - assert mi.get_level_values('first').inferred_type == 'string' - assert result.get_level_values('first').inferred_type == 'unicode' - - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - def test_str(self): - # tested elsewhere - pass - - def test_unicode_string_with_unicode(self): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - idx = pd.DataFrame(d).set_index(["a", "b"]).index - - if PY3: - str(idx) - else: - compat.text_type(idx) - - def test_bytestring_with_unicode(self): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - idx = pd.DataFrame(d).set_index(["a", "b"]).index - - if PY3: - bytes(idx) - else: - str(idx) - - def test_slice_keep_name(self): - x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], - names=['x', 'y']) - assert x[1:].names == x.names - - def test_isna_behavior(self): - # should not segfault GH5123 - # NOTE: if MI representation changes, may make sense to allow - # isna(MI) - with pytest.raises(NotImplementedError): - pd.isna(self.index) - - def test_level_setting_resets_attributes(self): - ind = pd.MultiIndex.from_arrays([ - ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] - ]) - assert ind.is_monotonic - ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) - # if this fails, probably didn't reset the cache correctly. - assert not ind.is_monotonic - - def test_is_monotonic_increasing(self): - i = MultiIndex.from_product([np.arange(10), - np.arange(10)], names=['one', 'two']) - assert i.is_monotonic - assert i._is_strictly_monotonic_increasing - assert Index(i.values).is_monotonic - assert i._is_strictly_monotonic_increasing - - i = MultiIndex.from_product([np.arange(10, 0, -1), - np.arange(10)], names=['one', 'two']) - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values).is_monotonic - assert not Index(i.values)._is_strictly_monotonic_increasing - - i = MultiIndex.from_product([np.arange(10), - np.arange(10, 0, -1)], - names=['one', 'two']) - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values).is_monotonic - assert not Index(i.values)._is_strictly_monotonic_increasing - - i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values).is_monotonic - assert not Index(i.values)._is_strictly_monotonic_increasing - - # string ordering - i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert not i.is_monotonic - assert not Index(i.values).is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values)._is_strictly_monotonic_increasing - - i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], - ['mom', 'next', 'zenith']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert i.is_monotonic - assert Index(i.values).is_monotonic - assert i._is_strictly_monotonic_increasing - assert Index(i.values)._is_strictly_monotonic_increasing - - # mixed levels, hits the TypeError - i = MultiIndex( - levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', - 'nl0000289783', - 'nl0000289965', 'nl0000301109']], - labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], - names=['household_id', 'asset_id']) - - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - - # empty - i = MultiIndex.from_arrays([[], []]) - assert i.is_monotonic - assert Index(i.values).is_monotonic - assert i._is_strictly_monotonic_increasing - assert Index(i.values)._is_strictly_monotonic_increasing - - def test_is_monotonic_decreasing(self): - i = MultiIndex.from_product([np.arange(9, -1, -1), - np.arange(9, -1, -1)], - names=['one', 'two']) - assert i.is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - assert Index(i.values).is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - - i = MultiIndex.from_product([np.arange(10), - np.arange(10, 0, -1)], - names=['one', 'two']) - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - i = MultiIndex.from_product([np.arange(10, 0, -1), - np.arange(10)], names=['one', 'two']) - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']]) - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - # string ordering - i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], - ['three', 'two', 'one']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert not i.is_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], - ['zenith', 'next', 'mom']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert i.is_monotonic_decreasing - assert Index(i.values).is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - assert Index(i.values)._is_strictly_monotonic_decreasing - - # mixed levels, hits the TypeError - i = MultiIndex( - levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965', - 'nl0000289783', 'lu0197800237', - 'gb00b03mlx29']], - labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], - names=['household_id', 'asset_id']) - - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - - # empty - i = MultiIndex.from_arrays([[], []]) - assert i.is_monotonic_decreasing - assert Index(i.values).is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - assert Index(i.values)._is_strictly_monotonic_decreasing - - def test_is_strictly_monotonic_increasing(self): - idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], - labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) - assert idx.is_monotonic_increasing - assert not idx._is_strictly_monotonic_increasing - - def test_is_strictly_monotonic_decreasing(self): - idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], - labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) - assert idx.is_monotonic_decreasing - assert not idx._is_strictly_monotonic_decreasing - - def test_reconstruct_sort(self): - - # starts off lexsorted & monotonic - mi = MultiIndex.from_arrays([ - ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] - ]) - assert mi.is_lexsorted() - assert mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert recons.is_lexsorted() - assert recons.is_monotonic - assert mi is recons - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - # cannot convert to lexsorted - mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), - ('x', 'b'), ('y', 'a'), ('z', 'b')], - names=['one', 'two']) - assert not mi.is_lexsorted() - assert not mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() - assert not recons.is_monotonic - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - # cannot convert to lexsorted - mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], - labels=[[0, 1, 0, 2], [2, 0, 0, 1]], - names=['col1', 'col2']) - assert not mi.is_lexsorted() - assert not mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() - assert not recons.is_monotonic - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - def test_reconstruct_remove_unused(self): - # xref to GH 2770 - df = DataFrame([['deleteMe', 1, 9], - ['keepMe', 2, 9], - ['keepMeToo', 3, 9]], - columns=['first', 'second', 'third']) - df2 = df.set_index(['first', 'second'], drop=False) - df2 = df2[df2['first'] != 'deleteMe'] - - # removed levels are there - expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], - [1, 2, 3]], - labels=[[1, 2], [1, 2]], - names=['first', 'second']) - result = df2.index - tm.assert_index_equal(result, expected) - - expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], - [2, 3]], - labels=[[0, 1], [0, 1]], - names=['first', 'second']) - result = df2.index.remove_unused_levels() - tm.assert_index_equal(result, expected) - - # idempotent - result2 = result.remove_unused_levels() - tm.assert_index_equal(result2, expected) - assert result2.is_(result) - - @pytest.mark.parametrize('level0', [['a', 'd', 'b'], - ['a', 'd', 'b', 'unused']]) - @pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'], - ['w', 'x', 'y', 'z', 'unused']]) - def test_remove_unused_nan(self, level0, level1): - # GH 18417 - mi = pd.MultiIndex(levels=[level0, level1], - labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) - - result = mi.remove_unused_levels() - tm.assert_index_equal(result, mi) - for level in 0, 1: - assert('unused' not in result.levels[level]) - - @pytest.mark.parametrize('first_type,second_type', [ - ('int64', 'int64'), - ('datetime64[D]', 'str')]) - def test_remove_unused_levels_large(self, first_type, second_type): - # GH16556 - - # because tests should be deterministic (and this test in particular - # checks that levels are removed, which is not the case for every - # random input): - rng = np.random.RandomState(4) # seed is arbitrary value that works - - size = 1 << 16 - df = DataFrame(dict( - first=rng.randint(0, 1 << 13, size).astype(first_type), - second=rng.randint(0, 1 << 10, size).astype(second_type), - third=rng.rand(size))) - df = df.groupby(['first', 'second']).sum() - df = df[df.third < 0.1] - - result = df.index.remove_unused_levels() - assert len(result.levels[0]) < len(df.index.levels[0]) - assert len(result.levels[1]) < len(df.index.levels[1]) - assert result.equals(df.index) - - expected = df.reset_index().set_index(['first', 'second']).index - tm.assert_index_equal(result, expected) - - def test_isin(self): - values = [('foo', 2), ('bar', 3), ('quux', 4)] - - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - result = idx.isin(values) - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - # empty, return dtype bool - idx = MultiIndex.from_arrays([[], []]) - result = idx.isin(values) - assert len(result) == 0 - assert result.dtype == np.bool_ - - @pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") - def test_isin_nan_not_pypy(self): - idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) - tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - np.array([False, False])) - tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - np.array([False, False])) - - @pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") - def test_isin_nan_pypy(self): - idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) - tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - np.array([False, True])) - tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - np.array([False, True])) - - def test_isin_level_kwarg(self): - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - - vals_0 = ['foo', 'bar', 'quux'] - vals_1 = [2, 3, 10] - - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) - - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) - - pytest.raises(IndexError, idx.isin, vals_0, level=5) - pytest.raises(IndexError, idx.isin, vals_0, level=-5) - - pytest.raises(KeyError, idx.isin, vals_0, level=1.0) - pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) - pytest.raises(KeyError, idx.isin, vals_1, level='A') - - idx.names = ['A', 'B'] - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) - - pytest.raises(KeyError, idx.isin, vals_1, level='C') - - def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): - # GH6552 - idx = self.index.copy() - target = idx.copy() - idx.names = target.names = [None, None] - - other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) - - # list & ndarray cases - assert idx.reindex([])[0].names == [None, None] - assert idx.reindex(np.array([]))[0].names == [None, None] - assert idx.reindex(target.tolist())[0].names == [None, None] - assert idx.reindex(target.values)[0].names == [None, None] - assert idx.reindex(other_dtype.tolist())[0].names == [None, None] - assert idx.reindex(other_dtype.values)[0].names == [None, None] - - idx.names = ['foo', 'bar'] - assert idx.reindex([])[0].names == ['foo', 'bar'] - assert idx.reindex(np.array([]))[0].names == ['foo', 'bar'] - assert idx.reindex(target.tolist())[0].names == ['foo', 'bar'] - assert idx.reindex(target.values)[0].names == ['foo', 'bar'] - assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar'] - assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar'] - - def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): - # GH7774 - idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], - names=['foo', 'bar']) - assert idx.reindex([], level=0)[0].names == ['foo', 'bar'] - assert idx.reindex([], level=1)[0].names == ['foo', 'bar'] - - def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self): - # GH7774 - idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 - assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ - - def test_groupby(self): - groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2])) - labels = self.index.get_values().tolist() - exp = {1: labels[:3], 2: labels[3:]} - tm.assert_dict_equal(groups, exp) - - # GH5620 - groups = self.index.groupby(self.index) - exp = {key: [key] for key in self.index} - tm.assert_dict_equal(groups, exp) - - def test_index_name_retained(self): - # GH9857 - result = pd.DataFrame({'x': [1, 2, 6], - 'y': [2, 2, 8], - 'z': [-5, 0, 5]}) - result = result.set_index('z') - result.loc[10] = [9, 10] - df_expected = pd.DataFrame({'x': [1, 2, 6, 9], - 'y': [2, 2, 8, 10], - 'z': [-5, 0, 5, 10]}) - df_expected = df_expected.set_index('z') - tm.assert_frame_equal(result, df_expected) - - def test_equals_operator(self): - # GH9785 - assert (self.index == self.index).all() - - def test_large_multiindex_error(self): - # GH12527 - df_below_1000000 = pd.DataFrame( - 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), - columns=['dest']) - with pytest.raises(KeyError): - df_below_1000000.loc[(-1, 0), 'dest'] - with pytest.raises(KeyError): - df_below_1000000.loc[(3, 0), 'dest'] - df_above_1000000 = pd.DataFrame( - 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), - columns=['dest']) - with pytest.raises(KeyError): - df_above_1000000.loc[(-1, 0), 'dest'] - with pytest.raises(KeyError): - df_above_1000000.loc[(3, 0), 'dest'] - - def test_partial_string_timestamp_multiindex(self): - # GH10331 - dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H') - abc = ['a', 'b', 'c'] - ix = pd.MultiIndex.from_product([dr, abc]) - df = pd.DataFrame({'c1': range(0, 15)}, index=ix) - idx = pd.IndexSlice - - # c1 - # 2016-01-01 00:00:00 a 0 - # b 1 - # c 2 - # 2016-01-01 12:00:00 a 3 - # b 4 - # c 5 - # 2016-01-02 00:00:00 a 6 - # b 7 - # c 8 - # 2016-01-02 12:00:00 a 9 - # b 10 - # c 11 - # 2016-01-03 00:00:00 a 12 - # b 13 - # c 14 - - # partial string matching on a single index - for df_swap in (df.swaplevel(), - df.swaplevel(0), - df.swaplevel(0, 1)): - df_swap = df_swap.sort_index() - just_a = df_swap.loc['a'] - result = just_a.loc['2016-01-01'] - expected = df.loc[idx[:, 'a'], :].iloc[0:2] - expected.index = expected.index.droplevel(1) - tm.assert_frame_equal(result, expected) - - # indexing with IndexSlice - result = df.loc[idx['2016-01-01':'2016-02-01', :], :] - expected = df - tm.assert_frame_equal(result, expected) - - # match on secondary index - result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :] - expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] - tm.assert_frame_equal(result, expected) - - # Even though this syntax works on a single index, this is somewhat - # ambiguous and we don't want to extend this behavior forward to work - # in multi-indexes. This would amount to selecting a scalar from a - # column. - with pytest.raises(KeyError): - df['2016-01-01'] - - # partial string match on year only - result = df.loc['2016'] - expected = df - tm.assert_frame_equal(result, expected) - - # partial string match on date - result = df.loc['2016-01-01'] - expected = df.iloc[0:6] - tm.assert_frame_equal(result, expected) - - # partial string match on date and hour, from middle - result = df.loc['2016-01-02 12'] - expected = df.iloc[9:12] - tm.assert_frame_equal(result, expected) - - # partial string match on secondary index - result = df_swap.loc[idx[:, '2016-01-02'], :] - expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] - tm.assert_frame_equal(result, expected) - - # tuple selector with partial string match on date - result = df.loc[('2016-01-01', 'a'), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - # Slicing date on first level should break (of course) - with pytest.raises(KeyError): - df_swap.loc['2016-01-01'] - - # GH12685 (partial string with daily resolution or below) - dr = date_range('2013-01-01', periods=100, freq='D') - ix = MultiIndex.from_product([dr, ['a', 'b']]) - df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix) - - result = df.loc[idx['2013-03':'2013-03', :], :] - expected = df.iloc[118:180] - tm.assert_frame_equal(result, expected) - - def test_rangeindex_fallback_coercion_bug(self): - # GH 12893 - foo = pd.DataFrame(np.arange(100).reshape((10, 10))) - bar = pd.DataFrame(np.arange(100).reshape((10, 10))) - df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1) - df.index.names = ['fizz', 'buzz'] - - str(df) - expected = pd.DataFrame({'bar': np.arange(100), - 'foo': np.arange(100)}, - index=pd.MultiIndex.from_product( - [range(10), range(10)], - names=['fizz', 'buzz'])) - tm.assert_frame_equal(df, expected, check_like=True) - - result = df.index.get_level_values('fizz') - expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10) - tm.assert_index_equal(result, expected) - - result = df.index.get_level_values('buzz') - expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz') - tm.assert_index_equal(result, expected) - - def test_dropna(self): - # GH 6194 - idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5], - [1, 2, np.nan, np.nan, 5], - ['a', 'b', 'c', np.nan, 'e']]) - - exp = pd.MultiIndex.from_arrays([[1, 5], - [1, 5], - ['a', 'e']]) - tm.assert_index_equal(idx.dropna(), exp) - tm.assert_index_equal(idx.dropna(how='any'), exp) - - exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], - [1, 2, np.nan, 5], - ['a', 'b', 'c', 'e']]) - tm.assert_index_equal(idx.dropna(how='all'), exp) - - msg = "invalid how option: xxx" - with tm.assert_raises_regex(ValueError, msg): - idx.dropna(how='xxx') - - def test_unsortedindex(self): - # GH 11897 - mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), - ('x', 'b'), ('y', 'a'), ('z', 'b')], - names=['one', 'two']) - df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, - columns=['one', 'two']) - - # GH 16734: not sorted, but no real slicing - result = df.loc(axis=0)['z', 'a'] - expected = df.iloc[0] - tm.assert_series_equal(result, expected) - - with pytest.raises(UnsortedIndexError): - df.loc(axis=0)['z', slice('a')] - df.sort_index(inplace=True) - assert len(df.loc(axis=0)['z', :]) == 2 - - with pytest.raises(KeyError): - df.loc(axis=0)['q', :] - - def test_unsortedindex_doc_examples(self): - # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa - dfm = DataFrame({'jim': [0, 0, 1, 1], - 'joe': ['x', 'x', 'z', 'y'], - 'jolie': np.random.rand(4)}) - - dfm = dfm.set_index(['jim', 'joe']) - with tm.assert_produces_warning(PerformanceWarning): - dfm.loc[(1, 'z')] - - with pytest.raises(UnsortedIndexError): - dfm.loc[(0, 'y'):(1, 'z')] - - assert not dfm.index.is_lexsorted() - assert dfm.index.lexsort_depth == 1 - - # sort it - dfm = dfm.sort_index() - dfm.loc[(1, 'z')] - dfm.loc[(0, 'y'):(1, 'z')] - - assert dfm.index.is_lexsorted() - assert dfm.index.lexsort_depth == 2 - - def test_tuples_with_name_string(self): - # GH 15110 and GH 14848 - - li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] - with pytest.raises(ValueError): - pd.Index(li, name='abc') - with pytest.raises(ValueError): - pd.Index(li, name='a') - - def test_nan_stays_float(self): - - # GH 7031 - idx0 = pd.MultiIndex(levels=[["A", "B"], []], - labels=[[1, 0], [-1, -1]], - names=[0, 1]) - idx1 = pd.MultiIndex(levels=[["C"], ["D"]], - labels=[[0], [0]], - names=[0, 1]) - idxm = idx0.join(idx1, how='outer') - assert pd.isna(idx0.get_level_values(1)).all() - # the following failed in 0.14.1 - assert pd.isna(idxm.get_level_values(1)[:-1]).all() - - df0 = pd.DataFrame([[1, 2]], index=idx0) - df1 = pd.DataFrame([[3, 4]], index=idx1) - dfm = df0 - df1 - assert pd.isna(df0.index.get_level_values(1)).all() - # the following failed in 0.14.1 - assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() - - def test_million_record_attribute_error(self): - # GH 18165 - r = list(range(1000000)) - df = pd.DataFrame({'a': r, 'b': r}, - index=pd.MultiIndex.from_tuples([(x, x) for x in r])) - - with tm.assert_raises_regex(AttributeError, - "'Series' object has no attribute 'foo'"): - df['a'].foo() - - def test_duplicate_multiindex_labels(self): - # GH 17464 - # Make sure that a MultiIndex with duplicate levels throws a ValueError - with pytest.raises(ValueError): - ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) - - # And that using set_levels with duplicate levels fails - ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], - [1, 2, 1, 2, 3]]) - with pytest.raises(ValueError): - ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], - inplace=True) - - def test_multiindex_compare(self): - # GH 21149 - # Ensure comparison operations for MultiIndex with nlevels == 1 - # behave consistently with those for MultiIndex with nlevels > 1 - - midx = pd.MultiIndex.from_product([[0, 1]]) - - # Equality self-test: MultiIndex object vs self - expected = pd.Series([True, True]) - result = pd.Series(midx == midx) - tm.assert_series_equal(result, expected) - - # Greater than comparison: MultiIndex object vs self - expected = pd.Series([False, False]) - result = pd.Series(midx > midx) - tm.assert_series_equal(result, expected)